## Model file
Defines and returns model architecture. Takes in dataframe, input type ('Chemprop','Morgan', or 'All' for both), n_hidden (number of hidden nodes), and model type ('Arrhenius','Point').

## Import libraries

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
def build_model(df,param):
    # Extract length of fingerprints
    n_cp =(df.columns.get_loc("fp_1399")-df.columns.get_loc("fp_0"))+1
    n_morgan = df.columns.get_loc(2047)-df.columns.get_loc(0)
    # Determine input size
    if param.input_type == 'All':
        n_input = n_cp + n_morgan
    elif param.input_type == 'Chemprop':
        n_input = n_cp
    elif param.input_type == 'Morgan':
        n_input = n_morgan
    else:
        print('Input type not supported')
    # Define specified model
    if param.model_type == 'Point' :
        class model(nn.Module):
            def __init__(self,p=param.dropout):
                super(model, self).__init__()
                self.drop_layer = nn.Dropout(p=p)
                if param.batch_norm:
                    self.fc1 = nn.Linear(n_input+1, param.hidden[0], bias=False) # Add one for Temperature
                    self.bn = nn.BatchNorm1d(param.hidden[0])
                else:
                    self.fc1 = nn.Linear(n_input+1, param.hidden[0])
                modules = []
                for i in range(len(param.hidden)-1):
                    if param.batch_norm:
                    # append batchnorm layer
                        modules.append(nn.Dropout(p=p))
                        modules.append(nn.Linear(param.hidden[i], param.hidden[i+1], bias=False))
                        modules.append(nn.BatchNorm1d(param.hidden[i+1]))
                    else:
                        modules.append(nn.Dropout(p=p))
                        modules.append(nn.Linear(param.hidden[i], param.hidden[i+1]))
                    modules.append(nn.ReLU())
                self.body = nn.Sequential(*modules)
                if param.target == 'All':
                    self.fck = nn.Linear(param.hidden[-1], 4)
                else:
                    self.fck = nn.Linear(param.hidden[-1], 1)
            
            def forward(self, x):
                x = self.drop_layer(x)
                if param.batch_norm:
                    x = F.relu_(self.bn(self.fc1(x)))
                else:
                    x = F.relu_(self.fc1(x))
                x = self.body(x)
                k = self.fck(x)
                return k
            
    elif param.model_type == 'Arrhenius' :
        class model(nn.Module):
            def __init__(self,p=param.dropout):
                super(model, self).__init__()
                self.drop_layer = nn.Dropout(p=p)
                if param.batch_norm:
                    self.fc1 = nn.Linear(n_input, param.hidden[0], bias=False)
                    self.bn = nn.BatchNorm1d(param.hidden[0])
                else:
                    self.fc1 = nn.Linear(n_input, param.hidden[0], bias=True)
                modules = []
                for i in range(len(param.hidden)-1):
                    if param.batch_norm:
                    # append batchnorm layer
                        modules.append(nn.Dropout(p=p))
                        modules.append(nn.Linear(param.hidden[i], param.hidden[i+1], bias=False))
                        modules.append(nn.BatchNorm1d(param.hidden[i+1]))
                    else:
                        modules.append(nn.Dropout(p=p))
                        modules.append(nn.Linear(param.hidden[i], param.hidden[i+1]))
                    modules.append(nn.ReLU())
                self.body = nn.Sequential(*modules)
                if param.target == 'All':
                    self.fcA = nn.Linear(param.hidden[-1], 4)
                    self.fcn = nn.Linear(param.hidden[-1], 4)
                    self.fcB = nn.Linear(param.hidden[-1], 4)
#                     self.fcA = nn.Linear(param.hidden[-1]+12, 4)
#                     self.fcn = nn.Linear(param.hidden[-1]+12, 4)
#                     self.fcB = nn.Linear(param.hidden[-1]+12, 4)
                else:
                    self.fcA = nn.Linear(param.hidden[-1], 1)
                    self.fcn = nn.Linear(param.hidden[-1], 1)
                    self.fcB = nn.Linear(param.hidden[-1], 1)

            def forward(self, x):
                x = self.drop_layer(x)
                if param.batch_norm:
                    x = F.relu_(self.bn(self.fc1(x)))
                else:
                    x = F.relu_(self.fc1(x))
                x = self.body(x)
                A = self.fcA(x)
                n = self.fcn(x)
                B = self.fcB(x)
#                 A0 = self.fcA0(self.body1(x))
#                 n0 = self.fcn0(self.body2(x))
#                 B0 = self.fcB0(self.body3(x))
#                 A = self.fcA(torch.cat((x,A0,n0,B0),dim=1))
#                 n = self.fcn(torch.cat((x,A0,n0,B0),dim=1))
#                 B = self.fcB(torch.cat((x,A0,n0,B0),dim=1))
                
                return A,n,B
    else:
        print('Input type not supported')
    
    model = model()
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.device_count() > 1:
        print("Using", torch.cuda.device_count(), "GPUs")
        model = nn.DataParallel(model).module
    model.to(device)
    
    return model

## wrapper function to define model, optimizer, criterion,scheduler

In [5]:
def model_initialize(df,param):
    model = build_model(df,param)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(),lr=param.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,step_size=param.scheduler_step,gamma=param.scheduler_gamma)
    return(model, criterion, optimizer, scheduler)