In [1]:
from model.featurisation import smiles2graph
from model.CL_model_vas_info import GNNModelWithNewLoss
import pandas as pd

In [2]:
df = pd.read_csv("./data/vsa_zinc.csv")  
smiles_list = df["SMILES"].tolist()
smr_vsa_list = [list(map(float, row.split())) for row in df["SMR_VSA"]]

In [3]:
def read_vsa_data(vsa_file):
    df = pd.read_csv(vsa_file)

    def parse_vsa(s):
        try:
            return list(map(float, s.strip('[]').split()))
        except:
            return []

    smr_arrays = df["SMR_VSA"].apply(parse_vsa).tolist()          
    slogp_arrays = df["SlogP_VSA"].apply(parse_vsa).tolist()     
    peoe_arrays = df["PEOE_VSA"].apply(parse_vsa).tolist()       

    properties = list(zip(smr_arrays, slogp_arrays, peoe_arrays))
    
    return df["SMILES"].tolist(), properties

x_smiles, properties = read_vsa_data("./data/vsa_zinc.csv")


In [4]:
data_list = smiles2graph(
    x_smiles, y=None, cluster=None, properties=properties, test=False
)

In [5]:
data_list[0]

Data(x=[18, 79], edge_index=[2, 34], edge_attr=[34, 10], global_features=[5], smiles='CCN(CCSC)C(=O)N[C@@](C)(CC)C(F)(F)F', property_0=[1, 10], property_1=[1, 10], property_2=[1, 14])

In [6]:
import torch
from torch_geometric.data import DataLoader
devices = ["cuda" if torch.cuda.is_available() else "cpu"]
model1 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=data_list[0].global_features.shape[0],
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=0 ,
        save_path= 'premodels/0' 
    ).to(devices[0])

In [7]:
model1.train_model(
    data_list,
)

Training will be saved to: premodels/0


Training:   0%|          | 0/3 [00:00<?, ?it/s]

Baseline Loss: 3.9900 | Actual Loss: 3.8966
Baseline Loss: 4.0111 | Actual Loss: 3.8768
Baseline Loss: 3.9934 | Actual Loss: 3.8059
Baseline Loss: 3.9886 | Actual Loss: 3.7719
Baseline Loss: 3.9855 | Actual Loss: 3.7346
Baseline Loss: 3.9850 | Actual Loss: 3.6100
Baseline Loss: 3.9927 | Actual Loss: 3.5457
Baseline Loss: 3.9897 | Actual Loss: 3.4984
Baseline Loss: 3.9857 | Actual Loss: 3.3824
Baseline Loss: 3.9918 | Actual Loss: 3.2798
Baseline Loss: 4.0025 | Actual Loss: 3.2197
Baseline Loss: 3.9931 | Actual Loss: 3.1580
Baseline Loss: 4.0081 | Actual Loss: 3.0899
Baseline Loss: 3.9917 | Actual Loss: 2.9656
Baseline Loss: 3.9879 | Actual Loss: 2.8691
Baseline Loss: 3.9928 | Actual Loss: 2.8979
Baseline Loss: 3.9910 | Actual Loss: 2.7448
Baseline Loss: 3.9875 | Actual Loss: 2.6911
Baseline Loss: 3.9906 | Actual Loss: 2.7290
Baseline Loss: 3.9956 | Actual Loss: 2.6683
Baseline Loss: 3.9784 | Actual Loss: 2.5829
Baseline Loss: 4.0173 | Actual Loss: 2.5558
Baseline Loss: 3.9904 | Actual L

Training:  33%|███▎      | 1/3 [01:04<02:09, 64.78s/it]

Baseline Loss: 3.9883 | Actual Loss: 1.2356
Baseline Loss: 3.5831 | Actual Loss: 0.8012
Epoch 1/3: Train Loss: 1.6124, Val Loss: 1.0862
New best validation loss: 1.0862
Baseline Loss: 3.9966 | Actual Loss: 1.1888
Baseline Loss: 3.9836 | Actual Loss: 1.0763
Baseline Loss: 4.0025 | Actual Loss: 1.0624
Baseline Loss: 4.0163 | Actual Loss: 1.0634
Baseline Loss: 3.9981 | Actual Loss: 1.0083
Baseline Loss: 3.9958 | Actual Loss: 1.2110
Baseline Loss: 3.9956 | Actual Loss: 0.9449
Baseline Loss: 4.0033 | Actual Loss: 1.0934
Baseline Loss: 4.0018 | Actual Loss: 1.0809
Baseline Loss: 3.9884 | Actual Loss: 1.1880
Baseline Loss: 3.9878 | Actual Loss: 1.1006
Baseline Loss: 3.9849 | Actual Loss: 1.0676
Baseline Loss: 3.9928 | Actual Loss: 1.1794
Baseline Loss: 4.0030 | Actual Loss: 1.1087
Baseline Loss: 3.9868 | Actual Loss: 1.1529
Baseline Loss: 3.9908 | Actual Loss: 1.0478
Baseline Loss: 3.9916 | Actual Loss: 1.0154
Baseline Loss: 3.9782 | Actual Loss: 1.0772
Baseline Loss: 4.0033 | Actual Loss: 1.

Training:  67%|██████▋   | 2/3 [02:09<01:04, 64.78s/it]

Baseline Loss: 3.9912 | Actual Loss: 0.8792
Baseline Loss: 3.9905 | Actual Loss: 0.8795
Baseline Loss: 3.9939 | Actual Loss: 0.8440
Baseline Loss: 3.9772 | Actual Loss: 0.6947
Baseline Loss: 3.9975 | Actual Loss: 0.7136
Baseline Loss: 3.9960 | Actual Loss: 0.9464
Baseline Loss: 3.9938 | Actual Loss: 0.7931
Baseline Loss: 4.0046 | Actual Loss: 0.8522
Baseline Loss: 3.9802 | Actual Loss: 0.8493
Baseline Loss: 3.9812 | Actual Loss: 0.6789
Baseline Loss: 3.9741 | Actual Loss: 0.7098
Baseline Loss: 3.9965 | Actual Loss: 0.8904
Baseline Loss: 3.9978 | Actual Loss: 0.8112
Baseline Loss: 3.9928 | Actual Loss: 0.8344
Baseline Loss: 3.9873 | Actual Loss: 0.7423
Baseline Loss: 4.0043 | Actual Loss: 0.8450
Baseline Loss: 3.9938 | Actual Loss: 0.6874
Baseline Loss: 3.9966 | Actual Loss: 0.8625
Baseline Loss: 3.9898 | Actual Loss: 0.7211
Baseline Loss: 4.0061 | Actual Loss: 0.8228
Baseline Loss: 3.9834 | Actual Loss: 0.6829
Baseline Loss: 3.9992 | Actual Loss: 0.9278
Baseline Loss: 3.9805 | Actual L

Training: 100%|██████████| 3/3 [03:14<00:00, 64.82s/it]

Baseline Loss: 3.9945 | Actual Loss: 0.5839
Baseline Loss: 3.9983 | Actual Loss: 0.7042
Baseline Loss: 3.9883 | Actual Loss: 0.8921
Baseline Loss: 3.5831 | Actual Loss: 0.5209
Epoch 3/3: Train Loss: 0.7570, Val Loss: 0.7321
New best validation loss: 0.7321





0.7320608977152376

In [8]:
model2 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=data_list[0].global_features.shape[0],
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=1 ,
        save_path= 'premodels/1'
    ).to(devices[0])

In [9]:
model2.train_model(
    data_list,
)

Training will be saved to: premodels/1


Training:   0%|          | 0/3 [00:00<?, ?it/s]

Baseline Loss: 3.9588 | Actual Loss: 3.9063
Baseline Loss: 3.9675 | Actual Loss: 3.8730
Baseline Loss: 3.9793 | Actual Loss: 3.8810
Baseline Loss: 3.9670 | Actual Loss: 3.8077
Baseline Loss: 3.9787 | Actual Loss: 3.7897
Baseline Loss: 3.9660 | Actual Loss: 3.7748
Baseline Loss: 3.9619 | Actual Loss: 3.6860
Baseline Loss: 3.9611 | Actual Loss: 3.6598
Baseline Loss: 3.9745 | Actual Loss: 3.5613
Baseline Loss: 3.9638 | Actual Loss: 3.5209
Baseline Loss: 3.9646 | Actual Loss: 3.4723
Baseline Loss: 3.9688 | Actual Loss: 3.3772
Baseline Loss: 3.9701 | Actual Loss: 3.2625
Baseline Loss: 3.9647 | Actual Loss: 3.2289
Baseline Loss: 3.9628 | Actual Loss: 3.0895
Baseline Loss: 3.9686 | Actual Loss: 3.0214
Baseline Loss: 3.9741 | Actual Loss: 3.0337
Baseline Loss: 3.9595 | Actual Loss: 2.7843
Baseline Loss: 3.9660 | Actual Loss: 2.9178
Baseline Loss: 3.9594 | Actual Loss: 2.8378
Baseline Loss: 3.9674 | Actual Loss: 2.9498
Baseline Loss: 3.9682 | Actual Loss: 2.7642
Baseline Loss: 3.9611 | Actual L

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7eb2276f61d0>>
Traceback (most recent call last):
  File "/home/easter/.conda/envs/chemprop/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


Baseline Loss: 3.9620 | Actual Loss: 1.8115
Baseline Loss: 3.9698 | Actual Loss: 1.8193
Baseline Loss: 3.9602 | Actual Loss: 1.8784
Baseline Loss: 3.9712 | Actual Loss: 2.1057
Baseline Loss: 3.9670 | Actual Loss: 1.7226
Baseline Loss: 3.9642 | Actual Loss: 1.9192
Baseline Loss: 3.9639 | Actual Loss: 1.6757
Baseline Loss: 3.9704 | Actual Loss: 1.7897
Baseline Loss: 3.9665 | Actual Loss: 1.6847
Baseline Loss: 3.9662 | Actual Loss: 1.9597
Baseline Loss: 3.9611 | Actual Loss: 2.2273
Baseline Loss: 3.9807 | Actual Loss: 2.0144
Baseline Loss: 3.9556 | Actual Loss: 1.7658
Baseline Loss: 3.9597 | Actual Loss: 1.7103
Baseline Loss: 3.9687 | Actual Loss: 1.8281
Baseline Loss: 3.9644 | Actual Loss: 1.7979
Baseline Loss: 3.9632 | Actual Loss: 1.7707
Baseline Loss: 3.9750 | Actual Loss: 1.8115
Baseline Loss: 3.9614 | Actual Loss: 1.6298
Baseline Loss: 3.9665 | Actual Loss: 1.6517
Baseline Loss: 3.9695 | Actual Loss: 1.5969
Baseline Loss: 3.9655 | Actual Loss: 1.7977
Baseline Loss: 3.9641 | Actual L

In [None]:
model3 = GNNModelWithNewLoss(
        num_node_features=data_list[0].x.shape[1],
        num_edge_features=data_list[0].edge_attr.shape[1],
        num_global_features=data_list[0].global_features.shape[0],
        hidden_dim=512,
        dropout_rate=0.1,
        property_index=2,
        save_path="premodels/2"
    ).to(devices[0])

In [None]:
model3.train_model(
    data_list,
)