In [1]:
import torch
from thop import profile
import numpy as np

In [2]:
from painn_flops import PaiNN

In [6]:
from schnetpack.nn.radial import GaussianRBF
from schnetpack.nn.cutoff import CosineCutoff

In [36]:
data = np.load('tob9.npz')
z = data["z"]
z = torch.from_numpy(z)
z

tensor([20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 14, 14, 14, 14, 14, 14, 14, 14,
        14, 14, 14, 14,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
         8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
         8,  8,  8,  8,  1,  1,  1,  1], dtype=torch.uint8)

In [38]:
from schnetpack.data import AtomsDataModule
import schnetpack.transform as trn
from schnetpack.datasets import MD17



tob9_data = AtomsDataModule(
    datapath = "./tob9.db",
    #molecule='tob9',
    batch_size=10,
    num_train=100,
    num_val=50,
    transforms=[
        trn.ASENeighborList(cutoff=4.),
        trn.RemoveOffsets("energy", remove_mean=True, remove_atomrefs=False),
        trn.CastTo32()
    ],
    num_workers=1,
    pin_memory=True, # set to false, when not using a GPU
)
tob9_data.prepare_data()
tob9_data.setup()

properties = tob9_data.dataset[0]
print('Loaded properties:\n', *['{:s}\n'.format(i) for i in properties.keys()])


  0%|                                                                                                            | 0/10 [00:00<?, ?it/s][A
 10%|██████████                                                                                          | 1/10 [00:00<00:01,  8.99it/s][A
100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 35.53it/s][A

Loaded properties:
 _idx
 energy
 forces
 _n_atoms
 _atomic_numbers
 _positions
 _cell
 _pbc






In [54]:
from torchvision.models import resnet50
from thop import profile
model = resnet50()
input = torch.randn(1, 3, 224, 224)
macs, params = profile(model, inputs=(input, ))
macs,params

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.


(4133742592.0, 25557032.0)

In [50]:
model = PaiNN(n_atom_basis=128,n_interactions=3,radial_basis=GaussianRBF(n_rbf = 20, cutoff = 4.0),cutoff_fn=CosineCutoff(cutoff = 4.0))
input = {'z': properties["_atomic_numbers"], 'rij': torch.randn(500,3), 'idxi': torch.randint(0,61,(1,500))[0], 'idxj': torch.randint(0,61,(1,500))[0]}
macs, params = profile(model, inputs=(input, ))
macs,params

[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.


(0.0, 0)

In [14]:
from ase import Atoms
import numpy as np
from schnetpack.data import ASEAtomsData

# load atoms from npz file. Here, we only parse the first 10 molecules
data = np.load('./md17_uracil.npz')

numbers = data["z"]
atoms_list = []
property_list = []
for positions, energies, forces in zip(data["R"], data["E"], data["F"]):
    ats = Atoms(positions=positions, numbers=numbers)
    properties = {'energy': energies, 'forces': forces}
    property_list.append(properties)
    atoms_list.append(ats)

#%rm './ethanol.db'
new_dataset = ASEAtomsData.create(
    './uracil.db',
    distance_unit='Ang',
    property_unit_dict={'energy':'kcal/mol', 'forces':'kcal/mol/Ang'}
)
new_dataset.add_systems(property_list, atoms_list)

AtomsDataError: Dataset already exists: ./uracil.db

In [15]:
import torch
import torchmetrics
import schnetpack as spk
import schnetpack.transform as trn
import pytorch_lightning as pl
import os
import matplotlib.pyplot as plt
import numpy as np

forcetut = './forcetut'
if not os.path.exists(forcetut):
    os.makedirs(forcetut)

In [16]:
from schnetpack.datasets import MD17

ethanol_data = MD17(
    os.path.join(forcetut,'u.db'),
    molecule='uracil',
    batch_size=10,
    num_train=1000,
    num_val=1000,
    transforms=[
        trn.ASENeighborList(cutoff=5.),
        trn.RemoveOffsets(MD17.energy, remove_mean=True, remove_atomrefs=False),
        trn.CastTo32()
    ],
    num_workers=1,
    pin_memory=True, # set to false, when not using a GPU
)
ethanol_data.prepare_data()
ethanol_data.setup()

100%|█████████████████████████████████████████████████████| 10/10 [00:00<00:00, 47.64it/s]


In [17]:
cutoff = 5.
n_atom_basis = 30

pairwise_distance = spk.atomistic.PairwiseDistances() # calculates pairwise distances between atoms
radial_basis = spk.nn.GaussianRBF(n_rbf=20, cutoff=cutoff)
schnet = spk.representation.SchNet(
    n_atom_basis=n_atom_basis, n_interactions=3,
    radial_basis=radial_basis,
    cutoff_fn=spk.nn.CosineCutoff(cutoff)
)

In [18]:
pred_energy = spk.atomistic.Atomwise(n_in=n_atom_basis, output_key=MD17.energy)
pred_forces = spk.atomistic.Forces(energy_key=MD17.energy, force_key=MD17.forces)

In [19]:
nnpot = spk.model.NeuralNetworkPotential(
    representation=schnet,
    input_modules=[pairwise_distance],
    output_modules=[pred_energy, pred_forces],
    postprocessors=[
        trn.CastTo64(),
        trn.AddOffsets(MD17.energy, add_mean=True, add_atomrefs=False)
    ]
)

In [20]:
output_energy = spk.task.ModelOutput(
    name=MD17.energy,
    loss_fn=torch.nn.MSELoss(),
    loss_weight=0.01,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)

output_forces = spk.task.ModelOutput(
    name=MD17.forces,
    loss_fn=torch.nn.MSELoss(),
    loss_weight=0.99,
    metrics={
        "MAE": torchmetrics.MeanAbsoluteError()
    }
)

In [21]:
task = spk.task.AtomisticTask(
    model=nnpot,
    outputs=[output_energy, output_forces],
    optimizer_cls=torch.optim.AdamW,
    optimizer_args={"lr": 1e-4}
)

/home/zkm/.local/lib/python3.8/site-packages/pytorch_lightning/utilities/parsing.py:199: Attribute 'model' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['model'])`.


In [22]:
logger = pl.loggers.TensorBoardLogger(save_dir=forcetut)
callbacks = [
    spk.train.ModelCheckpoint(
        model_path=os.path.join(forcetut, "best_inference_model"),
        save_top_k=1,
        monitor="val_loss"
    )
]

trainer = pl.Trainer(
    callbacks=callbacks,
    logger=logger,
    default_root_dir=forcetut,
    max_epochs=5, # for testing, we restrict the number of epochs
)
trainer.fit(task, datamodule=ethanol_data)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
2024-04-01 18:00:34.631035: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                   | Params
---------------------------------------------------
0 | model   | NeuralNetworkPotential | 16.4 K
1 | outputs | ModuleList             | 0     
---------------------------------------------------
16.4 K    Trainable params
0         Non-trainable params
16.4 K    Total params
0.066     Total estimated model params size (MB)


Sanity Checking: |                                                  | 0/? [00:00<?, ?it/s]

/home/zkm/.local/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=103` in the `DataLoader` to improve performance.
/home/zkm/.local/lib/python3.8/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 10. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/home/zkm/.local/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=103` in the `DataLoader` to improve performance.
/home/zkm/.local/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (10) is smaller 

Training: |                                                         | 0/? [00:00<?, ?it/s]

Validation: |                                                       | 0/? [00:00<?, ?it/s]

Validation: |                                                       | 0/? [00:00<?, ?it/s]

Validation: |                                                       | 0/? [00:00<?, ?it/s]

Validation: |                                                       | 0/? [00:00<?, ?it/s]

Validation: |                                                       | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=5` reached.


In [13]:
from typing import Dict
import torch
import torch.nn as nn
import schnetpack as spk
import torchmetrics
import schnetpack.transform as trn
import pytorch_lightning as pl
import os
import matplotlib.pyplot as plt
import numpy as np
from schnetpack.datasets import MD17
from schnetpack.model import AtomisticModel
import pytorch_lightning as pl

forcetut = './forcetut'
if not os.path.exists(forcetut):
    os.makedirs(forcetut)

ethanol_data = MD17(
    os.path.join(forcetut,'u.db'),
    molecule='uracil',
    batch_size=10,
    num_train=1000,
    num_val=1000,
    transforms=[
        trn.ASENeighborList(cutoff=5.),
        trn.RemoveOffsets(MD17.energy, remove_mean=True, remove_atomrefs=False),
        trn.CastTo32()
    ],
    num_workers=1,
    pin_memory=True, # set to false, when not using a GPU
)
ethanol_data.prepare_data()
ethanol_data.setup()

inputs = ethanol_data

pairwise_distance = spk.atomistic.PairwiseDistances()
input_modules = nn.ModuleList([pairwise_distance])
inputs = AtomisticModel.initialize_derivatives(inputs)
for m in input_modules:
    inputs = m(inputs)
inputs = self.representation(inputs)
postprocessors=[
    trn.CastTo64(),
    trn.AddOffsets(MD17.energy, add_mean=True, add_atomrefs=False)
]
inputs = self.postprocess(inputs)
#Dict[str,torch.Tensor]

100%|█████████████████████████████████████████████████████| 10/10 [00:00<00:00, 52.58it/s]


TypeError: initialize_derivatives() missing 1 required positional argument: 'inputs'