In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm, trange
import torch

import matplotlib.pyplot as plt

from dGbyG.utils.custom_tools import rapid_process_result, rapid_linear_reg
from dGbyG.network.Dataset import Train_Dataset
from dGbyG.network.GNNetwork import MP_network
from dGbyG.network.trainer import Model
from dGbyG.config import train_data_path, package_path

In [2]:
TrainingData_df = pd.read_csv(train_data_path)
mean_std = TrainingData_df.loc[:,'std'].mean()

Scale = []
for n, sem in zip(TrainingData_df.loc[:,'n'], TrainingData_df.loc[:,'SEM']):
    if np.isnan(sem):
        scale = mean_std
    else:
        scale = (sem**2 + mean_std**2/n)**0.5
    Scale.append(scale)
Scale = np.array(Scale)

equation = TrainingData_df.loc[:, 'reaction']
standard_dG_prime = TrainingData_df.loc[:, 'standard_dg_prime']
weight = 1/np.array(Scale)/np.median(Scale)

Train the network

In [None]:
for n in range(100):
    name = '10_fold_cross_validation_with_random_dG_'+str(n)
    dG = standard_dG_prime + np.random.randn(standard_dG_prime.shape[0]) * Scale
    TrainSet = Train_Dataset(equations=equation, dGs=dG, weights=weight)

    network = MP_network(atom_dim=TrainSet[0].x.size(1), bond_dim=TrainSet[0].edge_attr.size(1), emb_dim=300, num_layer=2)
    model = Model()
    model.network = network

    loss_history, Result_df, i = model.train(TrainSet, 9000, 1e-4, 1e-6)
    torch.save(model.network.state_dict(), os.path.join(package_path, 'network/best_model_params/'+str(n)+'.pt'))
