In [6]:
from pathlib import Path
import pandas as pd
from sklearn.model_selection import KFold, train_test_split
from teachopencadd.utils import seed_everything


SEED = 22
seed_everything(SEED)

In [7]:
HERE = Path(_dh[-1])
DATA = HERE / "data"

## Include PyG graphs objects to DataFrame
Molecules are featurized as graphs for all nodes and edges.
* Node features: atomic number, chirality, degree, formal charge, number of hydrogens, number of radical electrons, hybridization, aromaticity, is in ring
* Edge features: bond type, stereochemistry, bond conjugation


In [11]:
try:
    compound_df =  pd.read_pickle(DATA/"BACE_compounds_part3.pkl")
except:
    compound_df = pd.read_csv(DATA/"BACE_compounds_part3.csv",
        index_col=0,
    )
    from torch_geometric.utils import from_smiles
    compound_df['graph'] = compound_df.apply(lambda x: from_smiles(x.smiles), axis=1)
    compound_df.to_pickle(DATA/"BACE_compounds_part3.pkl")
print("Shape of dataframe : ", compound_df.shape)

Shape of dataframe :  (4823, 7)


In [4]:
compound_df.head()
print(f"DataFrame shape: {compound_df.shape}")

DataFrame shape: (4823, 7)


In [None]:
# GAT: out_channels=64, num_layers=5, lr=1e-3, batch=64, concat, extra fc layer 256 nodes, relu
from gnn_utils.training import nn_training_and_validation

kf = KFold(n_splits=5, shuffle=True)#, random_state=SEED)
for train_index, test_index in kf.split(compound_df)[0]:
    print('new training')
    train_x = compound_df.iloc[train_index].graph.to_list()
    train_y = compound_df.iloc[train_index].pIC50.to_list()
    test_x = compound_df.iloc[test_index].graph.to_list()
    test_y = compound_df.iloc[test_index].pIC50.to_list()
    splits = [train_x, test_x, train_y, test_y]
    # print(train_x)

    GNN = nn_training_and_validation(splits=splits, name='GNN')

new training
Epoch: 001, Train MSE: 1.3315, Test MSE: 1.4157
Epoch: 002, Train MSE: 1.1282, Test MSE: 1.1382
Epoch: 003, Train MSE: 1.0970, Test MSE: 1.1491
Epoch: 004, Train MSE: 0.9250, Test MSE: 0.9510
Epoch: 005, Train MSE: 0.9883, Test MSE: 1.0074
Epoch: 006, Train MSE: 1.2552, Test MSE: 1.2800
Epoch: 007, Train MSE: 1.0768, Test MSE: 1.0856
Epoch: 008, Train MSE: 0.8876, Test MSE: 0.9576
Epoch: 009, Train MSE: 0.6670, Test MSE: 0.7154
Epoch: 010, Train MSE: 1.3655, Test MSE: 1.3900
Epoch: 011, Train MSE: 1.3434, Test MSE: 1.3753
Epoch: 012, Train MSE: 1.2269, Test MSE: 1.2468
Epoch: 013, Train MSE: 1.0537, Test MSE: 1.0923
Epoch: 014, Train MSE: 1.5216, Test MSE: 1.5610
Epoch: 015, Train MSE: 1.6098, Test MSE: 1.7254
Epoch: 016, Train MSE: 1.2969, Test MSE: 1.3990
Epoch: 017, Train MSE: 1.5932, Test MSE: 1.6026
Epoch: 018, Train MSE: 1.2575, Test MSE: 1.2625
Epoch: 019, Train MSE: 1.0317, Test MSE: 1.1268
Epoch: 020, Train MSE: 2.0802, Test MSE: 2.0691
Epoch: 021, Train MSE: 2.28