In [1]:
from model.getdata import smiles2graph
from model.CL_model_vas_info import GNNModelWithNewLoss
import pandas as pd

In [2]:
df = pd.read_csv("./data/vsa.csv")  
smiles_list = df["SMILES"].tolist()
smr_vsa_list = [list(map(float, row.split())) for row in df["SMR_VSA"]]

In [3]:
def read_vsa_data(vsa_file):
    df = pd.read_csv(vsa_file)

    def parse_vsa(s):
        try:
            return list(map(float, s.strip('[]').split()))
        except:
            return []

    smr_arrays = df["SMR_VSA"].apply(parse_vsa).tolist()          
    slogp_arrays = df["SlogP_VSA"].apply(parse_vsa).tolist()     
    peoe_arrays = df["PEOE_VSA"].apply(parse_vsa).tolist()       

    properties = list(zip(smr_arrays, slogp_arrays, peoe_arrays))
    
    return df["SMILES"].tolist(), properties

x_smiles, properties = read_vsa_data("./data/vsa.csv")


In [4]:
data_list = smiles2graph(
    x_smiles,
    properties=properties,
)

In [5]:
data_list[0]

Data(x=[21, 133], edge_index=[2, 44], edge_attr=[44, 14], global_features=[1, 8], smiles='Cc1cccc(C2=CCN(C(=O)NCCCC#N)CC2)c1', property_0=[1, 10], property_1=[1, 10], property_2=[1, 14])

In [6]:
print(data_list[0].x.shape[1],
    data_list[0].edge_attr.shape[1],
    data_list[0].global_features.shape[1])

133 14 8


In [None]:
import torch
from torch_geometric.data import DataLoader
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model1 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=0 ,
        save_path= 'premodels_new_og/3/0' 
    ).to(devices[0])

In [8]:
model1.train_model(
    data_list,
)



Training will be saved to: premodels_new/0


Training:   0%|          | 0/300 [00:00<?, ?it/s]

Baseline Loss: 5.3143 | Actual Loss: 5.2730


Training:   0%|          | 1/300 [00:06<34:24,  6.90s/it]

Baseline Loss: 3.9720 | Actual Loss: 3.9277
Epoch 1/300: Train Loss: 5.2730, Val Loss: 3.9277
New best validation loss: 3.9277
Baseline Loss: 5.3143 | Actual Loss: 5.2710


Training:   1%|          | 2/300 [00:10<24:54,  5.02s/it]

Baseline Loss: 3.9720 | Actual Loss: 3.9123
Epoch 2/300: Train Loss: 5.2710, Val Loss: 3.9123
New best validation loss: 3.9123


Training:   1%|          | 2/300 [00:11<27:48,  5.60s/it]


KeyboardInterrupt: 

In [None]:
model2 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=1 ,
        save_path= 'premodels_new_og/3/1'
    ).to(devices[0])

In [12]:
model2.train_model(
    data_list,
)

Training will be saved to: premodels_new/1


Training:   0%|          | 0/300 [00:00<?, ?it/s]


KeyboardInterrupt: 

In [None]:
model3 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=2,
        save_path="premodels_new_og/3/2"
    ).to(devices[0])

In [None]:
model3.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model4 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 6,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=0 ,
        save_path= 'premodels_new_og/6/0' 
    ).to(devices[0])

model4.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model5 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 6,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index= 1,
        save_path= 'premodels_new_og/6/1' 
    ).to(devices[0])

model5.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model6 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 6,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index= 2,
        save_path= 'premodels_new_og/6/2' 
    ).to(devices[0])

model6.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model7 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 9,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index= 0,
        save_path= 'premodels_new_og/9/0' 
    ).to(devices[0])

model7.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model8 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 9,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index= 1,
        save_path= 'premodels_new_og/9/1' 
    ).to(devices[0])

model8.train_model(
    data_list,
)

In [None]:
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model9 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=0,
        cov_num= 9,
        hidden_dim=512,
        dropout_rate=0.1,
        property_index= 2,
        save_path= 'premodels_new_og/9/9' 
    ).to(devices[0])

model9.train_model(
    data_list,
)