# Prepare the Training and Testing Data Sets

First, we must load the data sets using the load_data_from_df function from transformer.py and then we will construct the data loader using the construct_loader, also from transformer.py. This notebook assumes you are running the model from the main section of this github repository, however we do set the current working directory to the soltrannet directory.

In [1]:
import os
import pandas as pd
import torch
import numpy as np
import time

os.chdir('soltrannet')

from soltrannet.data_utils import load_data_from_df, construct_loader

Setting the specified random seed

In [2]:
trainfile = 'Training_Data/train_subset1_1000.csv' 
testfile = 'Testing_Data/test_subset1_1000.csv' 

torch.manual_seed(420)
np.random.seed(420)

Loading the Training & Testing Data

In [3]:
batch_size = 8

trainX, trainy = load_data_from_df(trainfile, add_dummy_node=True, one_hot_formal_charge=True)
data_loader = construct_loader(trainX, trainy, batch_size)

testX, testy = load_data_from_df(testfile, add_dummy_node=True, one_hot_formal_charge=True)
testdata_loader = construct_loader(testX, testy, batch_size)

# Prepare Model

The model parameters used below are the default values used for the SolTranNet architecture.

In [4]:
from transformer import make_model

In [5]:
d_atom = trainX[0][0].shape[1] # It depends on the used featurization. To match 
                                #SolTranNet architecture d_atom should equal 28

model_params = {
    'd_atom': d_atom,
    'd_model': 8,
    'N': 8,
    'h': 2,
    'N_dense': 1,
    'lambda_attention': 0.33, 
    'leaky_relu_slope': 0.0, 
    'dense_output_nonlinearity': 'relu', 
    'dropout': 0.1,
    'aggregation_type': 'mean'
}


In [6]:
model = make_model(**model_params)

In [7]:
model.train()

GraphTransformer(
  (encoder): Encoder(
    (layers): ModuleList(
      (0): EncoderLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0): Linear(in_features=8, out_features=8, bias=True)
            (1): Linear(in_features=8, out_features=8, bias=True)
            (2): Linear(in_features=8, out_features=8, bias=True)
            (3): Linear(in_features=8, out_features=8, bias=True)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): PositionwiseFeedForward(
          (linears): ModuleList(
            (0): Linear(in_features=8, out_features=8, bias=True)
          )
          (dropout): ModuleList(
            (0): Dropout(p=0.1, inplace=False)
          )
        )
        (sublayer): ModuleList(
          (0): SublayerConnection(
            (norm): LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): SublayerConnection(
            (norm): LayerNorm()
   

In [8]:
#Setting up the loss function

criterion = torch.nn.MSELoss(reduction='mean')

In [9]:
#Selecting Optimizer

optimizer=torch.optim.SGD(model.parameters(),lr=1e-4,momentum=0.9,weight_decay=0)

# Run Training/Evaluation Loop

In [None]:
iteration = 0
for epoch in range(250):
    epoch_preds=np.array([])
    epoch_gold=np.array([])
    for batch in data_loader:
        iteration+=1
        optimizer.zero_grad()
        adjacency_matrix, node_features, y = batch
        batch_mask = torch.sum(torch.abs(node_features), dim=-1) !=0
        y_pred = model(node_features, batch_mask, adjacency_matrix, None)
        
        #accumulate the epoch training datas
        epoch_gold=np.append(epoch_gold,y.tolist())
        epoch_preds=np.append(epoch_preds,y_pred.tolist())
        
        loss=criterion(y_pred,y)
        
        loss.backward()
        
        #implementing gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(),2)
        
        optimizer.step()
        
        if iteration%100==0:
            #we evaluate the test set
            model.eval()
            gold=np.array([])
            preds=np.array([])
            
            for t_batch in testdata_loader:
                t_adjacency_matrix, t_node_features, t_y = t_batch
                gold=np.append(gold,t_y.tolist())
                t_batch_mask = torch.sum(torch.abs(t_node_features), dim=-1) !=0
                t_y_pred = model(t_node_features, t_batch_mask, t_adjacency_matrix, None)
                preds=np.append(preds,t_y_pred.tolist())
            
            test_rmse=np.sqrt(np.mean((preds-gold)**2))
            test_r2=np.corrcoef(preds,gold)[0][1]**2
            model.train()

    #end of 1 epoch -- time to log the stats
    
    #print(epoch_preds)
    #print(epoch_gold)
    train_rmse = (np.sqrt(np.mean((epoch_preds-epoch_gold)**2))) 
    train_r2 = np.corrcoef(epoch_preds,epoch_gold)[0][1]**2
    
    print(f'----------------------------------')
    print(f'Epoch: {epoch}')
    print(f'Training RMSE: {train_rmse}')
    print(f'Training R2: {train_r2}')
    #print(f'Test RMSE: {test_rmse}')
    #print(f'Test R2: {test_r2}')

  return [FloatTensor(features) for features in (adjacency_list, features_list, labels)]


----------------------------------
Epoch: 0
Training RMSE: 3.415679183746601
Training R2: 0.013341350056293303
----------------------------------
Epoch: 1
Training RMSE: 3.03908591082807
Training R2: 0.00836737764127391
----------------------------------
Epoch: 2
Training RMSE: 2.733333304425917
Training R2: 0.0011816092076617016
----------------------------------
Epoch: 3
Training RMSE: 2.5372603917640246
Training R2: 0.004344696458977883
----------------------------------
Epoch: 4
Training RMSE: 2.4169266163391314
Training R2: 0.06742326016621747
----------------------------------
Epoch: 5
Training RMSE: 2.3517834217042255
Training R2: 0.06520078662950021
----------------------------------
Epoch: 6
Training RMSE: 2.3058599319046267
Training R2: 0.1069868615021578
----------------------------------
Epoch: 7
Training RMSE: 2.2737914960187595
Training R2: 0.13074032526970367
----------------------------------
Epoch: 8
Training RMSE: 2.2345569283864144
Training R2: 0.1618056291313299
---

----------------------------------
Epoch: 74
Training RMSE: 1.7540733755968383
Training R2: 0.4645372150626944
----------------------------------
Epoch: 75
Training RMSE: 1.755717562840668
Training R2: 0.4613963842991761
----------------------------------
Epoch: 76
Training RMSE: 1.7641794779562618
Training R2: 0.4605647081482907
----------------------------------
Epoch: 77
Training RMSE: 1.745176546292241
Training R2: 0.4671765849870097
----------------------------------
Epoch: 78
Training RMSE: 1.7474497588086813
Training R2: 0.4677159909588535
----------------------------------
Epoch: 79
Training RMSE: 1.7327971382831668
Training R2: 0.4766527900605835
----------------------------------
Epoch: 80
Training RMSE: 1.7215498795920037
Training R2: 0.4813616381107064
----------------------------------
Epoch: 81
Training RMSE: 1.7432827384887337
Training R2: 0.47053560891972335
----------------------------------
Epoch: 82
Training RMSE: 1.7368639439663884
Training R2: 0.47025253333390815
-

In [None]:
torch.save(model.state_dict(),'trained.model')

In [None]:
model.eval()

gold=np.array([])
preds=np.array([])
t0=time.time()
train_times=[]

for batch in data_loader:
    t1=time.time()
    adjacency_matrix, node_features, y = batch
    tload=time.time()-t1
    gold=np.append(gold,y)
    batch_mask = torch.sum(torch.abs(node_features), dim=-1) != 0
    y_pred = model(node_features, batch_mask, adjacency_matrix, None)
    tpred=time.time()-t1
    preds=np.append(preds,y_pred.tolist())
    
    train_times.append((tload,tpred))
    
ttime=time.time()-t0
print('Overall Time: ',ttime)

In [None]:
r2=np.corrcoef(preds,gold)[0][1]**2
rmse=np.sqrt(np.mean((preds-gold)**2))

import matplotlib.pyplot as plt

plt.scatter(gold,preds)
z = np.polyfit(gold, preds, 1)
p = np.poly1d(z)
plt.plot(gold, p(gold), color='r')
plt.xlim([-10,2])
plt.ylim([-10,2])
plt.ylabel('Predictions')
plt.xlabel('Actual')
plt.title('%s without Quantum Data'%trainfile[14:])
plt.text(-8, -8, r'RMSE=%s'%rmse)
plt.text(-8, -9, r'R^2=%s'%r2)
plt.savefig('Without_QData_%s.png'%trainfile[14:])
plt.show()

goldavg = np.mean(gold)
print(goldavg)
predavg = np.mean(preds)
print(predavg)
print('Train RMSE:',rmse)
print('Train R2 :',r2)

In [None]:
#evaluate the test_set

gold=np.array([])
preds=np.array([])
t0=time.time()
test_times=[]
for batch in testdata_loader:
    t1=time.time()
    adjacency_matrix, node_features, y = batch
    tload=time.time()-t1
    gold=np.append(gold,y.tolist())
    batch_mask = torch.sum(torch.abs(node_features), dim=-1) != 0
    y_pred = model(node_features, batch_mask, adjacency_matrix, None)
    tpred=time.time()-t1
    preds=np.append(preds,y_pred.tolist())


    test_times.append((tload,tpred))

ttime=time.time()-t0
print('Overall Time: ',ttime)

r2=np.corrcoef(preds,gold)[0][1]**2
rmse=np.sqrt(np.mean((preds-gold)**2))

plt.scatter(gold,preds)
z = np.polyfit(gold, preds, 1)
p = np.poly1d(z)
plt.plot(gold, p(gold), color='r')
plt.xlim([-10,2])
plt.ylim([-10,2])
plt.ylabel('Predictions')
plt.xlabel('Actual')
plt.title('%s without Quantum Data'%testfile[13:])
plt.text(-8, -8, r'RMSE=%s'%rmse)
plt.text(-8, -9, r'R^2=%s'%r2)
plt.savefig('Without_QData_%s.png'%testfile[13:])
plt.show()
average = np.mean(gold)

print('Test RMSE:',rmse)
print('Test R2  :',r2)

Try using Captum to see what is happening in the model