In [191]:
from pyanitools import anidataloader
data = anidataloader("ani_gdb_s01.h5")
data_iter = data.__iter__()

In [192]:
mols = next(data_iter)
# Extract the data
P = mols['path']
X = mols['coordinates']
E = mols['energies'] #energies are in hartree
S = mols['species']
sm = mols['smiles']

# Print the data
print("Path:   ", P)
print("  Smiles:      ","".join(sm))
print("  Symbols:     ", S)
print("  Coordinates: ", X.shape)
print("  Energies:    ", E.shape, "\n")


Path:    /gdb11_s01/gdb11_s01-0
  Smiles:       [H]C([H])([H])[H]
  Symbols:      ['C', 'H', 'H', 'H', 'H']
  Coordinates:  (5400, 5, 3)
  Energies:     (5400,) 



In [193]:
data_iter = data.__iter__()
count = 0
count_conf = 0
for mol in data_iter:
    count+=1
    count_conf += len(mol['energies'])
print(count)
print(count_conf)

3
10800


In [194]:
data.cleanup()

In [195]:
import numpy as np


def calc_f_C(Rij, RC):
    """Calculate the local atomic environment approximation f_c. """
    f_C_value = 0.5 * np.cos(np.pi * Rij / RC) + 0.5
    # Make f_C(0)=0 to make sure the sum in distance conversion function 
    # and radial conversion function can run with j=i
    indicator = ((Rij <= RC) & (Rij != 0)).astype(float) 
    return f_C_value * indicator
    

def radial_component(Rijs, eta, Rs, RC=5.2):
    # Rijs is a 1d array, all other parameters are scalars
    f_C_values = calc_f_C(Rijs, RC)
    individual_components = np.exp(-eta * (Rijs - Rs) ** 2) * f_C_values
    return np.sum(individual_components)

def angular_component(Rij_vectors, Rik_vectors, zeta, theta_s, eta, Rs, RC=3.5):
    # Rij_vectors and Rik_vectors are 2d arrays with shape (n_atoms, 3), all other parameters are scalars
    # calculate theta_ijk values from vector operations
    dot_products = Rij_vectors.dot(Rik_vectors.T)
    Rij_norms = np.linalg.norm(Rij_vectors, axis=-1)
    Rik_norms = np.linalg.norm(Rik_vectors, axis=-1)
    norms = Rij_norms.reshape((-1, 1)).dot(Rik_norms.reshape((1, -1)))
    cos_values = np.clip(dot_products / (norms + 1e-8), -1, 1)
    theta_ijks = np.arccos(cos_values)
    theta_ijk_filter = (theta_ijks != 0).astype(float)
    mean_dists = (Rij_norms.reshape((-1, 1)) + Rik_norms.reshape((1, -1))) / 2
    f_C_values_Rij = calc_f_C(Rij_norms, RC)
    f_C_values_Rik = calc_f_C(Rik_norms, RC)
    f_C_values = f_C_values_Rij.reshape((-1, 1)).dot(f_C_values_Rik.reshape((1, -1)))
    individual_components = (1 + np.cos(theta_ijks - theta_s)) ** zeta * \
        np.exp(-eta * (mean_dists - Rs) ** 2) * f_C_values * theta_ijk_filter
    return 2 ** (1 - zeta) * np.sum(individual_components)

def calc_aev(atom_types, coords, i_index):
    # atom_types are np.array of ints
    relative_coordinates = coords - coords[i_index]
    nearby_atom_indicator = np.linalg.norm(relative_coordinates, axis=-1) < 5.3
    relative_coordinates = relative_coordinates[nearby_atom_indicator]
    atom_types = atom_types[nearby_atom_indicator]
    radial_aev = np.array([radial_component(np.linalg.norm(relative_coordinates[atom_types == atom], 
                                                           axis=-1), eta, Rs) \
                           for atom in [0, 1, 2, 3] for eta in [16] \
                           for Rs in [0.900000,1.168750,1.437500,1.706250,1.975000,2.243750,2.51250,2.781250,3.050000,\
                                   3.318750,3.587500,3.856250,4.125000,4.39375,4.662500,4.931250]])
    angular_aev = np.array([angular_component(relative_coordinates[atom_types == atom_j], 
                                              relative_coordinates[atom_types == atom_k],\
                                             zeta, theta_s, eta, Rs) \
                            for atom_j in [0, 1, 2, 3] for atom_k in range(atom_j, 4) for zeta in [32] \
                            for theta_s in [0.19634954,0.58904862,0.9817477,1.3744468,1.7671459,2.1598449,2.552544,2.945243]\
                            for eta in [8] for Rs in [0.900000,1.550000,2.200000,2.850000]])
    print(len(radial_aev), len(angular_aev))
    return np.concatenate([radial_aev, angular_aev])

        

In [196]:
import numpy as np
import matplotlib.pyplot as plt
r_ij = np.linspace(0,10,500)
rc = 5.2
# plt.plot(r_ij,[calc_f_C(r,rc)for r in r_ij]);


In [197]:
mapping={"H":0, "C":1, "N":2, "O":3}
elements= np.array([mapping[atom] for atom in S])
elements


array([1, 0, 0, 0, 0])

In [198]:
from functools import wraps
from time import time

def timing(f):
    @wraps(f)
    def wrap(*args, **kw):
        ts = time()
        result = f(*args, **kw)
        te = time()
        print('func:%r  took: %2.4f sec' % (f.__name__,  te-ts))
        return result
    return wrap


In [199]:
from torch.optim import SGD, Adam
import torch.nn.functional as F
import random
from tqdm import tqdm
import math
import torch
import torch.nn as nn
architecture = {"input_size": 1, "hidden1": 128, "hidden2": 128, "hidden3": 64, "output_size": 64} 
class ANI(nn.Module):
    def __init__(self):
        super(ANI, self).__init__()
        self.sub_nets = nn.ModuleDict({
            'C': ANI_sub(architecture),
            'H': ANI_sub(architecture),
            'N': ANI_sub(architecture),
            'O': ANI_sub(architecture)
        })
        # self.network = create_network(input_dim)

    def forward(self, aevs, atom_types):
        atomic_energies = []
        for atom_type, sub in self.sub_nets.items():
            atom = (atom_types == atom_type).unsqueeze(-1)
            atom_aev = aevs * atom
            atomic_energies[atom_type] = sub(atom_aev)
        # total_energies = torch.sum(atomic_energies,dim=...)
        total_energies = torch.stack(list(atomic_energies.values())).sum(dim=0)
        return total_energies


class ANI_sub(nn.Module):
    def __init__(self, architecture):
        super().__init__()
        self.fc1 = nn.Linear(architecture['input_size'], architecture['hidden1'])
        self.act1 = nn.Tanh()  
        self.fc2 = nn.Linear(architecture['hidden1'], architecture['hidden2'])
        self.act2 = nn.Tanh() 
        self.fc3 = nn.Linear(architecture['hidden2'], architecture['hidden3'])
        self.act3 = nn.Tanh() 
        self.fc4 = nn.Linear(architecture['hidden3'], architecture['output_size'])

        

    def forward(self, aev):
        x = self.fc1(aev)
        x = self.act1(x)
        x = self.fc2(x)
        x = self.act2(x)
        x = self.fc3(x)
        x = self.act3(x)
        x = self.fc4(x)
        atomic_energy = x.squeeze(dim=1)
        return atomic_energy
      


# def create_network(input_dim):
#     return nn.Sequential(
#         nn.Linear(input_dim, 128),
#         nn.SiLU(),
#         nn.Linear(128, 128),
#         nn.SiLU(),
#         nn.Linear(128, 64),
#         nn.SiLU(),
#         nn.Linear(64, 1)
#     )

# element_networks = nn.ModuleDict({
#     'H': create_network(aev_dim),
#     'C': create_network(aev_dim),
#     'N': create_network(aev_dim),
#     'O': create_network(aev_dim)
# })


In [189]:
elements

array([1, 0, 0, 0, 0])

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
Caevs = [] 
Haevs = [] 
Naevs = [] 
Oaevs = [] 

# for i in range(len(S)): 
#     aev = calc_aev(elements, X[i], i)
#     # scaled_aev = scaler.fit_transform(aev) 
#     aevs.append(aev)

aevs = []
for coords in X:
    for atom_index in range(len(elements)):
        aev = calc_aev(elements, coords, atom_index)
        aevs.append(aev)

all_aevs = np.array(aevs)




64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320
64 320

In [249]:
len(all_aevs)

27000

# Training

In [282]:
len(aevs)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
all_aevs = scaler.fit_transform(all_aevs)

y=E
scaler_y = StandardScaler()
# y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()
y = scaler_y.fit_transform(y.reshape(-1,1)).flatten()

C = all_aevs[0:5400]
H1 = all_aevs[5400:10800]
H2 = all_aevs[10800:16200]
H3 = all_aevs[16200:21600]
H4 = all_aevs[21600:27000]
aevs_norm = np.array([C, H1, H2, H3, H4])



(5, 5400, 384)

In [341]:
from torch.optim import SGD, Adam
import torch.nn.functional as F
import random
from tqdm import tqdm
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import math 

def create_chunks(complete_list, chunk_size=None, num_chunks=None):
    '''
    Cut a list into multiple chunks, each having chunk_size (the last chunk might be less than chunk_size)
    or having a total of num_chunk chunks
    '''
    chunks = []
    if num_chunks is None:
        num_chunks = math.ceil(len(complete_list) / chunk_size)
    elif chunk_size is None:
        chunk_size = math.ceil(len(complete_list) / num_chunks)
    for i in range(num_chunks):
        chunks.append(complete_list[i * chunk_size: (i + 1) * chunk_size])
    return chunks

class Trainer():
    def __init__(self, model, optimizer_type, learning_rate, epoch, batch_size,\
                 input_transform=lambda x: x, max_tries=10, lr_decay=0.5, lr_min=1e-5):
        """ The class for training the model
        model: nn.Module
            A pytorch model
        optimizer_type: 'adam' or 'sgd'
        learning_rate: float
        epoch: int
        batch_size: int
        input_transform: func
        max_tries: int, # of epochs with no improvement, then reduce lr 
        lr_decay: float, factor to reduce lr by 
        lr_min: float, minimum learning rate threshold
        """
        self.model = model
        if optimizer_type == "sgd":
            self.optimizer = SGD(model.parameters(), learning_rate,momentum=0.9)
        elif optimizer_type == "adam":
            self.optimizer = Adam(model.parameters(), learning_rate, betas=(0.9, 0.999)) 
        self.learning_rate = learning_rate
        self.epoch = epoch
        self.batch_size = batch_size
        self.input_transform = input_transform
        self.max_tries = max_tries 
        self.lr_decay = lr_decay
        self.lr_min = lr_min 
        
    def input_transform(self, inputs): 
        return torch.tensor(inputs, dtype=torch.float)
    
    @staticmethod
    def rmse(predictions, targets):
        return np.sqrt(((predictions - targets) ** 2).mean())

    @timing
    def train(self, aevs,outputs, val_aevs, val_outputs, learning_rate, early_stop=False, l2=False, silent=False):
        """ train self.model with specified arguments
        inputs: np.array, The shape of input_transform(input) should be (ndata,nfeatures)
        outputs: np.array shape (ndata,)
        val_nputs: np.array, The shape of input_transform(val_input) should be (ndata,nfeatures)
        val_outputs: np.array shape (ndata,)
        early_stop: bool
        l2: bool
        silent: bool. Controls whether or not to print the train and val error during training
        @return
        a dictionary of arrays with train and val losses and accuracies
        """
        ### convert data to tensor of correct shape and type here ###
        
        inputs = torch.tensor(aevs, dtype=torch.float)
        outputs = torch.tensor(outputs, dtype=torch.float32)  # Adjust the data type if needed
        val_inputs = torch.tensor(val_aevs, dtype=torch.float)
        val_outputs = torch.tensor(val_outputs, dtype=torch.float32)  # Adjust the data type if needed


        losses = []
        accuracies = []
        val_losses = []
        val_accuracies = []
        weights = self.model.state_dict()
        lowest_val_loss = np.inf
        no_improvement = 0 # count the number of epochs without improvement in loss fn
        
        
        for n_epoch in tqdm(range(self.epoch), leave=False):
            self.model.train()
            batch_indices = list(range(inputs.shape[0]))
            random.shuffle(batch_indices)
            batch_indices = create_chunks(batch_indices, chunk_size=self.batch_size)
            epoch_loss = 0
            epoch_acc = 0
            for batch in batch_indices:
                batch_importance = len(batch) / len(outputs) # ratio of this batch 
                batch_input = inputs[batch]
                batch_output = outputs[batch]
                ### make prediction and compute loss with loss function of your choice on this batch ###
                batch_predictions = self.model(batch_input) 
                loss = nn.MSELoss()(batch_predictions, batch_output)
            
                if l2:
                    ### Compute the loss with L2 regularization ###
                    l2_norm = sum([p.pow(2.0).sum() for p in self.model.parameters()])
                    loss = loss + 1e-5 * l2_norm
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.detach().cpu().item() * batch_importance
                # acc = torch.sum(torch.argmax(batch_predictions, \
                #                 axis=-1)==batch_output) / len(batch_predictions)
            
                
            val_loss, val_acc = self.evaluate(val_inputs, val_outputs,print_acc=False)
     
            if val_loss < lowest_val_loss:
                lowest_val_loss = val_loss
                weights = self.model.state_dict()
            else: 
                no_improvement+=1 
            if no_improvement >= self.max_tries:  # reduce learning rate and reset count (new try) 
                learning_rate *= self.lr_decay 
                learning_rate= max(learning_rate, self.lr_min)
                self.optimizer.param_groups[0]['lr'] = learning_rate
                no_improvement = 0
            if n_epoch % 10 ==0 and not silent: 
                print("Epoch %d/%d - Loss: %.3f - Acc: %.3f" % (n_epoch + 1, self.epoch, epoch_loss, epoch_acc))
                print("              Val_loss: %.3f - Val_acc: %.3f" % (val_loss, val_acc))
                
            losses.append(epoch_loss)
            accuracies.append(epoch_acc)
            val_losses.append(val_loss)
            val_accuracies.append(val_acc)
           

        if early_stop:
            self.model.load_state_dict(weights)    

        return {"losses": losses, "accuracies": accuracies\
                , "val_losses": val_losses, "val_accuracies": val_accuracies}
        
    def evaluate(self, inputs, outputs, print_acc=True):
        """ evaluate model on provided input and output
        inputs: np.array, The shape of input_transform(input) should be (ndata,nfeatures)
        outputs: np.array shape (ndata,)
        print_acc: bool
        
        @return
        losses: float
        rmse: float (rmse) 
        """
        inputs = self.input_transform(inputs)
        outputs = torch.tensor(outputs, dtype=torch.float32)
        # if torch.is_tensor(inputs):
        #     inputs = self.input_transform(inputs)
        # else:
        #     inputs = self.input_transform(torch.tensor(inputs, dtype=torch.float))
        #     outputs = torch.tensor(outputs, dtype=torch.int64)
        self.model.eval()
        batch_indices = list(range(inputs.shape[0]))
        batch_indices = create_chunks(batch_indices, chunk_size=self.batch_size)
        rmse = 0
        losses = 0
        for batch in batch_indices:
            batch_importance = len(batch) / len(outputs)
            batch_input = inputs[batch]
            batch_output = outputs[batch]
            with torch.no_grad():
                batch_predictions = self.model(batch_input)
                loss = nn.MSELoss()(batch_predictions, batch_output)
            batch_rmse = self.rmse(batch_predictions, batch_output)
            losses += loss.detach().cpu().item() * batch_importance
            rmse += batch_rmse * batch_importance

        if print_acc:
            print("rmse: %.3f" % acc)
        return losses, rmse

In [342]:
from torch.optim import SGD, Adam
import torch.nn.functional as F
import random
from tqdm import tqdm
import math
import torch
import torch.nn as nn
architecture = {"input_size": 1, "hidden1": 128, "hidden2": 128, "hidden3": 64, "output_size": 1} 

class ANI_single(nn.Module):
    def __init__(self, input_dim):
        super(ANI_single, self).__init__()
        self.fc1 = nn.Linear(input_dim, architecture['hidden1'])
        self.act1 = nn.Tanh()  
        self.fc2 = nn.Linear(architecture['hidden1'], architecture['hidden2'])
        self.act2 = nn.Tanh() 
        self.fc3 = nn.Linear(architecture['hidden2'], architecture['hidden3'])
        self.act3 = nn.Tanh() 
        self.fc4 = nn.Linear(architecture['hidden3'], 1)

    def forward(self, aev):
        x = self.fc1(aev)
        x = self.act1(x)
        x = self.fc2(x)
        x = self.act2(x)
        x = self.fc3(x)
        x = self.act3(x)
        x = self.fc4(x)
        atomic_energy = x.squeeze(dim=1)
        return atomic_energy
 


In [343]:
from sklearn.model_selection import train_test_split
aevs_train, aevs_val, outputs_train, outputs_val = train_test_split(aevs_norm[0], y, test_size=0.2, random_state=42)
# aevs_val, aevs_test, atom_types_val, atom_types_test, outputs_val, outputs_test = train_test_split(aevs_temp, atom_types_temp,\
#                                                                                         outputs_temp, test_size=0.5, random_state=42)

In [344]:
model = ANI_single(384)
optimizer_type = 'adam'
learning_rate = 0.001 
epoch = 100
batch_size= 128 
atom_types = torch.tensor(elements)
trainer = Trainer(model, optimizer_type, learning_rate, epoch, batch_size, input_transform=lambda x: x,)

results = trainer.train(aevs_train, outputs_train, aevs_val, outputs_val, learning_rate)
# trainer.train(ch4_aev, elements, target1, ch4_aev, elements, target1, early_stop=False, l2=False, silent=False)
# ch4_aev

  outputs = torch.tensor(outputs, dtype=torch.float32)
  2%|█▍                                                                       | 2/100 [00:00<00:19,  5.10it/s]

Epoch 1/100 - Loss: 1.000 - Acc: 0.000
              Val_loss: 1.043 - Val_acc: 1.020


 12%|████████▋                                                               | 12/100 [00:01<00:12,  6.80it/s]

Epoch 11/100 - Loss: 0.989 - Acc: 0.000
              Val_loss: 1.042 - Val_acc: 1.019


 22%|███████████████▊                                                        | 22/100 [00:03<00:12,  6.31it/s]

Epoch 21/100 - Loss: 0.987 - Acc: 0.000
              Val_loss: 1.039 - Val_acc: 1.018


 32%|███████████████████████                                                 | 32/100 [00:04<00:10,  6.56it/s]

Epoch 31/100 - Loss: 0.984 - Acc: 0.000
              Val_loss: 1.040 - Val_acc: 1.018


 42%|██████████████████████████████▏                                         | 42/100 [00:06<00:08,  6.65it/s]

Epoch 41/100 - Loss: 0.980 - Acc: 0.000
              Val_loss: 1.041 - Val_acc: 1.019


 52%|█████████████████████████████████████▍                                  | 52/100 [00:07<00:07,  6.61it/s]

Epoch 51/100 - Loss: 0.976 - Acc: 0.000
              Val_loss: 1.042 - Val_acc: 1.019


 62%|████████████████████████████████████████████▋                           | 62/100 [00:09<00:05,  6.45it/s]

Epoch 61/100 - Loss: 0.974 - Acc: 0.000
              Val_loss: 1.043 - Val_acc: 1.020


 72%|███████████████████████████████████████████████████▊                    | 72/100 [00:11<00:04,  6.18it/s]

Epoch 71/100 - Loss: 0.973 - Acc: 0.000
              Val_loss: 1.043 - Val_acc: 1.020


 82%|███████████████████████████████████████████████████████████             | 82/100 [00:13<00:02,  6.23it/s]

Epoch 81/100 - Loss: 0.973 - Acc: 0.000
              Val_loss: 1.044 - Val_acc: 1.020


 92%|██████████████████████████████████████████████████████████████████▏     | 92/100 [00:14<00:01,  6.27it/s]

Epoch 91/100 - Loss: 0.972 - Acc: 0.000
              Val_loss: 1.044 - Val_acc: 1.020


                                                                                                              

func:'train'  took: 16.0758 sec




In [345]:
results

{'losses': [1.0004607169716448,
  0.9924012007536713,
  0.9934316891211051,
  0.9932869368129309,
  0.9934909392286231,
  0.9920124654416685,
  0.9919049501419068,
  0.9916314800580344,
  0.9921633618849294,
  0.9914212248943471,
  0.9894713891877068,
  0.9902360086087828,
  0.990579400680683,
  0.9915535043787074,
  0.9892594778979269,
  0.9873946666717531,
  0.9883883083308184,
  0.9873246978830408,
  0.9872134204264041,
  0.9881896222079243,
  0.9872870109699392,
  0.9878647998527245,
  0.986583711924376,
  0.9864731386855795,
  0.984368313241888,
  0.9842044728773612,
  0.9842677142884997,
  0.9846564769744873,
  0.9842637322567126,
  0.9830840565540175,
  0.9838252133793302,
  0.9833377414279515,
  0.9822029074033105,
  0.9828763948546515,
  0.9807972391446433,
  0.9809441681261417,
  0.980044068672039,
  0.9802890804078845,
  0.9795752278080693,
  0.9794278427406594,
  0.9797456251250372,
  0.9792726922918251,
  0.9785927807843242,
  0.9784196036833305,
  0.9770847400029502,
  0.

In [None]:
import torchani
import torch

Rcr = 5.2
Rca = 3.5
EtaR = torch.tensor([16], dtype=torch.float)
ShfR = torch.tensor([0.900000,1.168750,1.437500,1.706250,1.975000,2.243750,2.51250,2.781250,3.050000,\
                            3.318750,3.587500,3.856250,4.125000,4.39375,4.662500,4.931250], dtype=torch.float)
EtaA= torch.tensor([8], dtype=torch.float)
Zeta = torch.tensor([32], dtype=torch.float)
ShfA = torch.tensor([0.900000,1.550000,2.200000,2.850000], dtype=torch.float)
ShfZ = torch.tensor([0.19634954,0.58904862,0.9817477,1.3744468,1.7671459,2.1598449,2.552544,2.945243], 
                    dtype=torch.float)

species_order = ['H', 'C', 'N', 'O']
num_species = len(species_order)
aev_computer = torchani.AEVComputer(Rcr, Rca, EtaR, 
                                    ShfR, EtaA, Zeta, ShfA, ShfZ, num_species)
energy_shifter = torchani.utils.EnergyShifter(None)

In [346]:
len(aev)

384

In [122]:
aev_dim = len(aev)

H_network = torch.nn.Sequential(
    torch.nn.Linear(aev_dim, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 1)
)

C_network = torch.nn.Sequential(
    torch.nn.Linear(aev_dim, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 1)
)

N_network = torch.nn.Sequential(
    torch.nn.Linear(aev_dim, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 1)
)

O_network = torch.nn.Sequential(
    torch.nn.Linear(aev_dim, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 128),
    torch.nn.Tanh(),
    torch.nn.Linear(128, 64),
    torch.nn.Tanh(),
    torch.nn.Linear(64, 1)
)

nn = torchani.ANIModel([H_network, C_network, N_network, O_network])

In [45]:
import torch
import torchani
import os
import math
import torch.utils.tensorboard
import tqdm
import pickle

try:
    path = os.path.dirname(os.path.realpath(__file__))
except NameError:
    path = os.getcwd()
dspath = os.path.join(path, 'ani_gdb_s01.h5')
batch_size = 2560

pickled_dataset_path = 'dataset.pkl'

# We pickle the dataset after loading to ensure we use the same validation set
# each time we restart training, otherwise we risk mixing the validation and
# training sets on each restart.
if os.path.isfile(pickled_dataset_path):
    print(f'Unpickling preprocessed dataset found in {pickled_dataset_path}')
    with open(pickled_dataset_path, 'rb') as f:
        dataset = pickle.load(f)
    training = dataset['training'].collate(batch_size).cache()
    validation = dataset['validation'].collate(batch_size).cache()
    energy_shifter.self_energies = dataset['self_energies'].to(device)
else:
    print(f'Processing dataset in {dspath}')
    training, validation = torchani.data.load(dspath)\
                                        .subtract_self_energies(energy_shifter, species_order)\
                                        .species_to_indices(species_order)\
                                        .shuffle()\
                                        .split(0.8, None)
    with open(pickled_dataset_path, 'wb') as f:
        pickle.dump({'training': training,
                     'validation': validation,
                     'self_energies': energy_shifter.self_energies.cpu()}, f)
    training = training.collate(batch_size).cache()
    validation = validation.collate(batch_size).cache()

Unpickling preprocessed dataset found in dataset.pkl


NameError: name 'device' is not defined

In [33]:
def init_params(m):
    if isinstance(m, torch.nn.Linear):
        torch.nn.init.kaiming_normal_(m.weight, a=1.0)
        torch.nn.init.zeros_(m.bias)


nn.apply(init_params)

ANIModel(
  (0): Sequential(
    (0): Linear(in_features=384, out_features=160, bias=True)
    (1): Identity()
    (2): Linear(in_features=160, out_features=128, bias=True)
    (3): Identity()
    (4): Linear(in_features=128, out_features=96, bias=True)
    (5): Identity()
    (6): Linear(in_features=96, out_features=1, bias=True)
  )
  (1): Sequential(
    (0): Linear(in_features=384, out_features=144, bias=True)
    (1): Identity()
    (2): Linear(in_features=144, out_features=112, bias=True)
    (3): Identity()
    (4): Linear(in_features=112, out_features=96, bias=True)
    (5): Identity()
    (6): Linear(in_features=96, out_features=1, bias=True)
  )
  (2): Sequential(
    (0): Linear(in_features=384, out_features=128, bias=True)
    (1): Identity()
    (2): Linear(in_features=128, out_features=112, bias=True)
    (3): Identity()
    (4): Linear(in_features=112, out_features=96, bias=True)
    (5): Identity()
    (6): Linear(in_features=96, out_features=1, bias=True)
  )
  (3): Se

In [63]:
E.min(), E.max()

(-40.4995983468, -40.4397148239)