In [1]:
!pip install numpy==1.24.4



In [4]:
import pickle
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import dgl
import dgl.nn.pytorch as dglnn
import dgl.function as dglfn
from collections import deque
from tqdm.auto import tqdm
from IPython.display import clear_output
import time

from dataset import SepDataset, collate_fn
from model import ModelNew
import metrics

seed = np.random.randint(2021, 2022) ##random

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

torch.manual_seed(seed)
np.random.seed(seed)

In [5]:
def timeSince(since):
    now = time.time()
    s = now - since
    return now, s

In [6]:
def train(model, train_loader_compound, criterion, optimizer,epoch,device):
    model.train()
    tbar = tqdm(train_loader_compound, total=len(train_loader_compound))
    losses = []
    t = time.time()
    for i, data in enumerate(tbar):
        data0 = [i.to(device) for i in data[0]]
        ga, gr, gi, aff = data0
        vina = data[1]
        y_pred = model(ga,gr,gi,vina).squeeze()
        y_true = aff.float().squeeze()

        assert y_pred.shape == y_true.shape
        loss = criterion(y_pred,y_true).cuda()
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
        optimizer.step()
        optimizer.zero_grad()
        losses.append(loss.item())
#         tbar.set_description(f'epoch {epoch+1} loss {np.mean(losses[-10:]):.4f} grad {grad_norm:.4f}')

    m_losses=np.mean(losses)

    return m_losses

In [7]:
def valid(model, valid_loader_compound, criterion,device):
    model.eval()
    losses = []
    outputs = []
    targets = []
    tbar = tqdm(valid_loader_compound, total=len(valid_loader_compound))
    for i, data in enumerate(tbar):
        data0 = [i.to(device) for i in data[0]]
        ga, gr, gi, aff = data0
        vina = data[1]
        with torch.no_grad():
            y_pred = model(ga,gr,gi,vina).squeeze()
        y_true = aff.float().squeeze()
        assert y_pred.shape == y_true.shape
        loss = criterion(y_pred,y_true).cuda()
        losses.append(loss.item())
        outputs.append(y_pred.cpu().detach().numpy().reshape(-1))
        targets.append(y_true.cpu().detach().numpy().reshape(-1))
    targets = np.concatenate(targets).reshape(-1)
    outputs = np.concatenate(outputs).reshape(-1)

    evaluation = {
        'c_index': metrics.c_index(targets, outputs),
        'RMSE': metrics.RMSE(targets, outputs),
        'MAE': metrics.MAE(targets, outputs),
        'SD': metrics.SD(targets, outputs),
        'CORR': metrics.CORR(targets, outputs),}
    ml=np.mean(losses)

    return ml, evaluation

In [72]:
def main():

    number_of_complexes = 423
    
    F=open(r'train_val_test_comp.pkl','rb')
    content=pickle.load(F)
    F.close()
    vina_list= []
    graphs = dgl.load_graphs('graphs_arr.bin')[0] # index 0 for graph, index 1 for labels, which we didn't put in the graph
    labels = pd.read_csv('labels.csv')
    vina_terms=open(r'Vina_terms.pkl','rb')
    vina=pickle.load(vina_terms)
    vina_terms.close()
    # print(vina)
    for i in range(number_of_complexes):
        if labels.id[i] in vina.keys():
            vina_list.append(vina[labels.id[i]])
    # print(vina_list)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    compound_train = content[0]
    compound_valid = content[1]
    compound_test = content[2]
    # print(compound_test)

    train_dataset_compound = SepDataset([graphs[i] for i in compound_train], [vina_list[i] for i in compound_train], [labels.id[i] for i in compound_train], [labels.koff[i] for i in compound_train], ['a_conn','r_conn', 'int_l'])
    valid_dataset_compound = SepDataset([graphs[i] for i in compound_valid], [vina_list[i] for i in compound_valid], [labels.id[i] for i in compound_valid], [labels.koff[i] for i in compound_valid], ['a_conn','r_conn', 'int_l'])
    test_dataset_compound  = SepDataset([graphs[i] for i in compound_test],  [vina_list[i] for i in compound_test],  [labels.id[i] for i in compound_test],  [labels.koff[i] for i in compound_test],  ['a_conn','r_conn', 'int_l'])
        
    train_loader_compound = DataLoader(train_dataset_compound, batch_size=8, shuffle=True, num_workers=0, collate_fn=collate_fn,pin_memory=False,drop_last=False,)
    valid_loader_compound = DataLoader(valid_dataset_compound, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn)
    test_loader_compound = DataLoader(test_dataset_compound, batch_size=8, shuffle=False, num_workers=0, collate_fn=collate_fn)
    print(train_loader_compound)

    model = ModelNew()
    model = model.to(device)
    optimizer = torch.optim.AdamW(model.parameters(), 1.2e-4, weight_decay=1e-6)   ### (model.parameters(), 1e-3, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=40, eta_min=1e-6)
    criterion = torch.nn.MSELoss()

    n_epoch = 80
    best_R = 0.0
    for epoch in range(n_epoch):
        ll = train(model, train_loader_compound, criterion, optimizer,epoch,device)
        if epoch%1==0:
            l,evaluation = valid(model, valid_loader_compound, criterion,device)
            l_, evaluation_ = valid(model, test_loader_compound, criterion,device)
            print(f'epoch {epoch+1} train_loss {ll:.5f} valid_loss {l:.5f}')
            clear_output()
            if evaluation_['CORR']>best_R:
                best_R= evaluation_['CORR']
                torch.save({'model': model.state_dict()}, '../model/model.pth')
        scheduler.step()

In [71]:
! pip show dgl

Name: dgl
Version: 1.1.2.post1
Summary: Deep Graph Library
Home-page: https://github.com/dmlc/dgl
Author: 
Author-email: 
License: APACHE
Location: /Users/azamat/anaconda3/lib/python3.11/site-packages
Requires: networkx, numpy, psutil, requests, scipy, tqdm
Required-by: 


In [70]:
if __name__ == "__main__":
    main()

<torch.utils.data.dataloader.DataLoader object at 0x1808cff10>


  0%|          | 0/38 [00:00<?, ?it/s]

DGLError: [22:31:16] /tmp/dgl_src/src/runtime/c_runtime_api.cc:82: Check failed: allow_missing: Device API cuda is not enabled. Please install the cuda version of dgl.
Stack trace:
  [bt] (0) 1   libdgl.dylib                        0x000000016f747aef dmlc::LogMessageFatal::~LogMessageFatal() + 111
  [bt] (1) 2   libdgl.dylib                        0x000000016fb381a3 dgl::runtime::DeviceAPIManager::GetAPI(std::__1::basic_string<char, std::__1::char_traits<char>, std::__1::allocator<char>>, bool) + 371
  [bt] (2) 3   libdgl.dylib                        0x000000016fb37f81 dgl::runtime::DeviceAPIManager::GetAPI(int, bool) + 353
  [bt] (3) 4   libdgl.dylib                        0x000000016fb49fe3 dgl::runtime::NDArray::Empty(std::__1::vector<long long, std::__1::allocator<long long>>, DGLDataType, DGLContext) + 403
  [bt] (4) 5   libdgl.dylib                        0x000000016f70e02d dgl::runtime::NDArray::CopyTo(DGLContext const&) const + 253
  [bt] (5) 6   libdgl.dylib                        0x000000016fcdbed3 dgl::aten::COOMatrix::CopyTo(DGLContext const&) const + 179
  [bt] (6) 7   libdgl.dylib                        0x000000016fcc61ad dgl::UnitGraph::COO::CopyTo(DGLContext const&) const + 253
  [bt] (7) 8   libdgl.dylib                        0x000000016fcc5931 dgl::UnitGraph::CopyTo(std::__1::shared_ptr<dgl::BaseHeteroGraph>, DGLContext const&) + 497
  [bt] (8) 9   libdgl.dylib                        0x000000016fb9b02b dgl::HeteroGraph::CopyTo(std::__1::shared_ptr<dgl::BaseHeteroGraph>, DGLContext const&) + 379

