In [2]:
import numpy as np
import torch
from torch.utils.data import DataLoader,Dataset,random_split
from torchvision.transforms import transforms
import pandas as pd
import pickle
import sqlite3
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.base import BaseEstimator, TransformerMixin
import os
from IPython.display import clear_output

In [3]:
class VectorizerApply(BaseEstimator,TransformerMixin):
    def __init__(self,vectorizer_type) -> None:
        super().__init__()
        self.vectoryzer_type = vectorizer_type
        self.Vectorizers_ = None
        self.dataset_columns_ = None
        self.id_col_ = None
        self.name_col_ = None
        self.index_col_ = None

    def fit(self,X:pd.DataFrame,y=None,**fit_params):
        self.Vectorizers_ = []
        self.id_col_ = fit_params['id']
        self.name_col_ = fit_params['name']
        self.index_col_ = fit_params['index']        
        X = X.drop([fit_params['id'],fit_params['index'],fit_params['name']],axis=1)
        self.dataset_columns_ = X.columns
        for column in self.dataset_columns_:
            X[column + 'ـseparate'] = X[column].apply(lambda x: x.replace('|',' '))
            vectorizer = self.vectoryzer_type()
            vectorizer.fit(X[column + 'ـseparate']).toarray()
            self.Vectorizers_.append(vectorizer)
        return self
    
    def transform(self,X:pd.DataFrame) -> pd.DataFrame:
        result_columns = [self.id_col_,self.name_col_]
        result_data = X[[self.id_col_,self.name_col_]].values
        X = X.drop([self.id_col_,self.index_col_,self.name_col_],axis=1)
        for i,column in enumerate(self.dataset_columns_):
            X[column + 'ـseparate'] = X[column].apply(lambda x: x.replace('|',' '))
            vectorizer = self.Vectorizers_[i]
            data_matrix = vectorizer.transform(X[column + 'ـseparate']).toarray()
            result_columns = result_columns + list(vectorizer.get_feature_names_out())
            data_matrix[data_matrix > 1] = 1
            result_data = np.concatenate((result_data,data_matrix),axis=1)
        result_df = pd.DataFrame(data=result_data,columns=result_columns)
        return result_df

    def fit_transform(self, X:pd.DataFrame, y=None, **fit_params) -> pd.DataFrame:
        self.Vectorizers_ = []
        self.id_col_ = fit_params['id']
        self.name_col_ = fit_params['name']
        self.index_col_ = fit_params['index']        
        result_columns = [fit_params['id'],fit_params['name']]
        result_data = X[[fit_params['id'],fit_params['name']]].values
        X = X.drop([fit_params['id'],fit_params['index'],fit_params['name']],axis=1)
        self.dataset_columns_ = X.columns
        for column in self.dataset_columns_:
            X[column + 'ـseparate'] = X[column].apply(lambda x: x.replace('|',' '))
            vectorizer = self.vectoryzer_type()
            data_matrix = vectorizer.fit_transform(X[column + 'ـseparate']).toarray()
            result_columns = result_columns + list(vectorizer.get_feature_names_out())
            data_matrix[data_matrix > 1] = 1
            result_data = np.concatenate((result_data,data_matrix),axis=1)
            self.Vectorizers_.append(vectorizer)
        result_df = pd.DataFrame(data=result_data,columns=result_columns)
        return result_df   

In [4]:
class Label_creator(BaseEstimator,TransformerMixin):
    def __init__(self) -> None:
        super().__init__()
        self.interaction_list_ = None
        self.index_col_ = None
        self.mechanism_col_ = None
        self.action_col_ = None

    def fit(self,X:pd.DataFrame,y=None,**fit_params):
        self.index_col_ = fit_params['index']
        self.mechanism_col_= fit_params['mechanism']
        self.action_col_ = fit_params['action']
        self.interaction_list_ = list(set(X[self.mechanism_col_] + ' '+ X[self.action_col_]))

    def transform(self,X:pd.DataFrame) -> pd.DataFrame:
        X = X.drop([self.index_col_],axis=1)
        X['interaction'] = X[self.mechanism_col_] + ' ' + X[self.action_col_]
        X['interaction_numaber'] = list(map(lambda x : self.interaction_list_.index(x),X[self.mechanism_col_] + ' ' + X[self.action_col_]))
        X = X.drop([self.mechanism_col_,self.action_col_],axis=1)
        return X

    def fit_transform(self, X:pd.DataFrame, y=None, **fit_params) -> pd.DataFrame:
        self.index_col_ = fit_params['index']
        self.mechanism_col_= fit_params['mechanism']
        self.action_col_ = fit_params['action']
        self.interaction_list_ = list(set(X[self.mechanism_col_] + ' '+ X[self.action_col_]))
        X = X.drop([self.index_col_],axis=1)
        X['interaction'] = X[self.mechanism_col_] + ' ' + X[self.action_col_]
        X['interaction_numaber'] = list(map(lambda x : self.interaction_list_.index(x),X[self.mechanism_col_] + ' ' + X[self.action_col_]))
        X = X.drop([self.mechanism_col_,self.action_col_],axis=1)
        return X
        
        
        

In [5]:
class DrugDataset(Dataset):

    def __init__(self,X,y,**kwargs) -> None:
        super().__init__(**kwargs)
        self.pair_drugs = []
        self.classes = [item[1] for item in sorted(set(zip(y['interaction_numaber'],y['interaction'])))]
        self.class_to_idx = dict(set(zip(y['interaction'],y['interaction_numaber'])))
        self.data = None
        self.targets = None
        final_X = []
        final_y = []
        for i in range(len(y)):
            first_drug = y.loc[i,'drugA']
            second_drug = y.loc[i,'drugB']
            first_drug_vec = X[X['name']==first_drug].values[0]
            second_drug_vec = X[X['name']== second_drug].values[0]
            final_X.append(np.int32(np.vstack((first_drug_vec[2:],second_drug_vec[2:]))))
            final_y.append(y.loc[i,'interaction_numaber'])
            self.pair_drugs.append((first_drug,second_drug))
            final_X.append(np.int32(np.vstack((second_drug_vec[2:],first_drug_vec[2:]))))
            final_y.append(y.loc[i,'interaction_numaber'])
            self.pair_drugs.append((second_drug,first_drug))
            if os.name == 'nt':
                if i%100 == 0:
                    _ = os.system('cls')
                    print(f'Process: {(i/len(y))*100} %')
            else:
                if i%100 == 0:
                    _ = os.system('clear')
                    print(f'Process: {(i/len(y))*100} %')
            # if i % 100:
            #     clear_output(wait=True)
            #     print(f'Process: {(i/len(y))*100} %')
        self.data = torch.Tensor(final_X)
        self.targets = torch.Tensor(final_y).to(dtype=torch.int32)

    def __len__(self) -> int:
        return len(self.targets)
    
    def __getitem__(self, index):
        return self.data[index],self.targets[index]

# dataset 1

In [34]:
with open('./DS1/df.pkl','rb') as f:
    dataset = pickle.load(file=f)
dataset

Unnamed: 0,index,id,side,target,enzyme,pathway,smile,name
0,0,DB01296,C1096328|C0162830|C1611725|C0541767|C0242973|C...,P14780|Q00653|P01375|P01579|P33673,P33261|P05181,hsa:4318|hsa:4791|hsa:7124|hsa:3458,9|10|14|18|19|20|178|181|283|284|285|286|299|3...,Glucosamine
1,1,DB09230,C0015371|C0949040|C0856054|C0231926|C1608969|C...,Q02641,P08684,hsa:782,9|10|11|12|13|14|15|16|18|19|20|129|131|132|17...,Azelnidipine
2,2,DB05812,C0341697|C0035232|C0855476|C0162119|C1142166|C...,P05093,P08684|Q06520|P10635|P10632|P05177|P33261|P11712,hsa:1586,9|10|11|12|14|18|143|147|178|179|182|183|184|1...,Abiraterone
3,3,DB01195,C1737214|C0015376|C0576091|C1536116|C0679254|C...,Q14524|P35499|Q12809,P10635|P11712,hsa:6331|hsa:6329|hsa:3757,9|10|11|12|14|15|18|19|23|24|25|178|180|181|18...,Flecainide
4,4,DB00201,C0423602|C0239557|C0031924|C0947912|C0600125|C...,P30542|P29274|Q07343|P21817|BE0004922|P78527|O...,P20815|P05177|P24462|P08684|P05181|P10632|P117...,hsa:134|hsa:135|hsa:5142|hsa:6261|hsa:5591|hsa...,9|10|11|14|15|16|18|19|143|148|149|178|183|184...,Caffeine
...,...,...,...,...,...,...,...,...
567,567,DB01587,C0155867|C0341697|C0231341|C0853557|C0159060|C...,P30536|P14867|P18505|Q8N1C3|O14764|P78334,P08684,hsa:706|hsa:2554|hsa:2560|hsa:2565|hsa:2563|hs...,9|10|11|12|14|15|18|19|37|178|182|183|184|185|...,Ketazolam
568,568,DB00448,C0241148|C0040440|C1096403|C0011253|C0001416|C...,P20648|P10636,P33261|P11712|P08684|P04798|P05177|Q16678|P332...,hsa:495|hsa:4137,9|10|11|12|14|15|18|19|23|24|33|143|148|149|17...,Lansoprazole
569,569,DB00559,C0030283|C0856054|C0853557|C0426597|C0026636|C...,P25101|P24530,P08684|P11712,hsa:1909|hsa:1910,9|10|11|12|14|15|16|18|19|20|33|178|182|183|18...,Bosentan
570,570,DB04953,C0238097|C1095952|C0558401|C0341217|C0154446|C...,O43526|O43525|P56696|Q9NR82,P22309|P35503|P22310|O60656|P11509|P11245,hsa:3785|hsa:3786|hsa:9132|hsa:56479,9|10|11|12|14|15|18|19|23|178|182|185|189|283|...,Ezogabine


In [35]:
vec = VectorizerApply(CountVectorizer)
X = vec.fit_transform(dataset,id='id',index='index',name='name')
X

Unnamed: 0,id,name,c0000727,c0000729,c0000733,c0000734,c0000735,c0000772,c0000809,c0000810,...,840,842,845,847,860,861,863,866,93,95
0,DB01296,Glucosamine,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,DB09230,Azelnidipine,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,DB05812,Abiraterone,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
3,DB01195,Flecainide,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,DB00201,Caffeine,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567,DB01587,Ketazolam,1,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
568,DB00448,Lansoprazole,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
569,DB00559,Bosentan,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
570,DB04953,Ezogabine,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [36]:
connection = sqlite3.connect('./DS1/event.db')
extraction = pd.read_sql('select * from extraction;', connection)
extraction

Unnamed: 0,index,mechanism,action,drugA,drugB
0,0,The risk or severity of adverse effects,increase,Abemaciclib,Amiodarone
1,1,The serum concentration,decrease,Abemaciclib,Apalutamide
2,2,The serum concentration,increase,Abemaciclib,Aprepitant
3,3,The metabolism,decrease,Abemaciclib,Atomoxetine
4,4,The metabolism,decrease,Abemaciclib,Bortezomib
...,...,...,...,...,...
37259,37259,The serum concentration,increase,Nefazodone,Netupitant
37260,37260,The metabolism,decrease,Nefazodone,Nicardipine
37261,37261,The serum concentration,increase,Neratinib,Netupitant
37262,37262,The serum concentration,increase,Netupitant,Nicardipine


In [37]:
label_creator = Label_creator()
y = label_creator.fit_transform(extraction,index='index',mechanism='mechanism',action='action')
y

Unnamed: 0,drugA,drugB,interaction,interaction_numaber
0,Abemaciclib,Amiodarone,The risk or severity of adverse effects increase,4
1,Abemaciclib,Apalutamide,The serum concentration decrease,6
2,Abemaciclib,Aprepitant,The serum concentration increase,24
3,Abemaciclib,Atomoxetine,The metabolism decrease,63
4,Abemaciclib,Bortezomib,The metabolism decrease,63
...,...,...,...,...
37259,Nefazodone,Netupitant,The serum concentration increase,24
37260,Nefazodone,Nicardipine,The metabolism decrease,63
37261,Neratinib,Netupitant,The serum concentration increase,24
37262,Netupitant,Nicardipine,The serum concentration increase,24


In [38]:
drug_dataset = DrugDataset(X,y)

Process: 0.0 %
Process: 0.2683555173894375 %
Process: 0.536711034778875 %
Process: 0.8050665521683126 %
Process: 1.07342206955775 %
Process: 1.3417775869471877 %
Process: 1.6101331043366252 %
Process: 1.8784886217260626 %
Process: 2.1468441391155 %
Process: 2.415199656504938 %
Process: 2.6835551738943755 %
Process: 2.9519106912838127 %
Process: 3.2202662086732503 %
Process: 3.488621726062688 %
Process: 3.756977243452125 %
Process: 4.025332760841563 %
Process: 4.293688278231 %
Process: 4.562043795620438 %
Process: 4.830399313009876 %
Process: 5.098754830399313 %
Process: 5.367110347788751 %
Process: 5.635465865178188 %
Process: 5.903821382567625 %
Process: 6.172176899957063 %
Process: 6.440532417346501 %
Process: 6.708887934735938 %
Process: 6.977243452125376 %
Process: 7.2455989695148135 %
Process: 7.51395448690425 %
Process: 7.782310004293688 %
Process: 8.050665521683126 %
Process: 8.319021039072563 %
Process: 8.587376556462 %
Process: 8.855732073851438 %
Process: 9.124087591240876 %


In [39]:
# save dataset
if not os.path.exists('./saved_dataset/'):
    os.makedirs('./saved_dataset/')

with open('./saved_dataset/drug_dataset.pickle','wb') as f:
    pickle.dump(drug_dataset,f)

In [53]:
# split dataset to train and validation section
train_size = int(0.8 * len(drug_dataset))
val_size = len(drug_dataset) - train_size

train_dataset , val_dataset = random_split(drug_dataset,lengths=[train_size,val_size])


In [60]:
# create data loader for train loader and val loader

train_loader = DataLoader(train_dataset,batch_size=16,shuffle=True)

val_loader = DataLoader(val_dataset,batch_size=16)

In [20]:
# Deep learning module

class CPSP1(torch.nn.Module):

    def __init__(self,in_channels,conv_mid_channel,out_dim,*args, **kwargs) -> None:
        super(CPSP1,self).__init__(*args, **kwargs)
        self.conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=conv_mid_channel,kernel_size=(2,3),stride=2)
        self.bn1 = torch.nn.BatchNorm2d(conv_mid_channel)
        self.conv1_1 = torch.nn.Conv2d(in_channels=conv_mid_channel,out_channels=1, kernel_size=1)
        self.bn2 = torch.nn.BatchNorm2d(1)
        self.fc1 = torch.nn.Linear(6448, 2048)
        self.fc2 = torch.nn.Linear(2048, 1024)
        self.fc3 = torch.nn.Linear(1024, out_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = torch.nn.functional.selu(x)
        x = self.bn1(x)
        x = self.conv1_1(x)
        x = torch.nn.functional.selu(x)
        x = self.bn2(x)
        x = x.view(-1,int(x.nelement() / x.shape[0]))
        x = self.fc1(x)
        x = torch.nn.functional.selu(x)
        x = self.fc2(x)
        x = torch.nn.functional.selu(x)
        x = self.fc3(x)
        return x

In [135]:
# create model and train loop
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = CPSP1(1,3,65).to(device=device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.0001,momentum=0.9)

N_epochs = 100

pre_val_loss = np.inf

for epoch in range(N_epochs):
    # Training
    train_loss = 0.0
    model.train()
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    val_loss = 0.0
    model.eval()
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor).to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
    if val_loss >= pre_val_loss:
        print("Epoch: {} Train Loss: {} Val Loss: {}".format(epoch,
                                                         train_loss/len(train_loader),
                                                         val_loss/len(val_loader)))
        break
    else:
        pre_val_loss = val_loss
        print("Epoch: {} Train Loss: {} Val Loss: {}".format(epoch,
                                                         train_loss/len(train_loader),
                                                         val_loss/len(val_loader)))
    
    

Epoch: 0 Train Loss: 1.3171025156047218 Val Loss: 0.8245241704033168
Epoch: 1 Train Loss: 0.7067390859191015 Val Loss: 0.622211436393307
Epoch: 2 Train Loss: 0.5587394935607927 Val Loss: 0.5486049201307507
Epoch: 3 Train Loss: 0.47253441559723053 Val Loss: 0.4700812046603967
Epoch: 4 Train Loss: 0.4036195465101932 Val Loss: 0.4319542506150102
Epoch: 5 Train Loss: 0.34331930259521026 Val Loss: 0.3640920912303318
Epoch: 6 Train Loss: 0.28537669621342016 Val Loss: 0.3297004930059477
Epoch: 7 Train Loss: 0.234158639757346 Val Loss: 0.27890224731427343
Epoch: 8 Train Loss: 0.1875623720596855 Val Loss: 0.23901614176445188
Epoch: 9 Train Loss: 0.14708658441204528 Val Loss: 0.2167630347936882
Epoch: 10 Train Loss: 0.11261952902146158 Val Loss: 0.1890209762643335
Epoch: 11 Train Loss: 0.0878823318116133 Val Loss: 0.15990529475781604
Epoch: 12 Train Loss: 0.06397854240051702 Val Loss: 0.1433683533352388
Epoch: 13 Train Loss: 0.04877701456346373 Val Loss: 0.12246843920710852
Epoch: 14 Train Loss:

In [136]:
# Define the file path to save the model
PATH = "./saved_dataset/model.pt"
# Save the model state dictionary
torch.save(model.state_dict(), PATH)

In [140]:
correct = 0
total = 0

# Set the model to evaluation mode
model.eval()

# Disable gradient computations
with torch.no_grad():
    for inputs, labels in train_loader:
        # Move inputs and labels to the device (e.g. GPU) if available
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass to get model predictions
        outputs = model(inputs)

        # Get the predicted class for each input
        _, predicted = torch.max(outputs.data, 1)

        # Count the number of correct predictions
        correct += (predicted == labels).sum().item()

        # Count the total number of inputs
        total += labels.size(0)

# Compute the accuracy
accuracy = correct / total
print('trian Accuracy: {:.2%}'.format(accuracy))

trian Accuracy: 99.27%


In [139]:
correct = 0
total = 0

# Set the model to evaluation mode
model.eval()

# Disable gradient computations
with torch.no_grad():
    for inputs, labels in val_loader:
        # Move inputs and labels to the device (e.g. GPU) if available
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass to get model predictions
        outputs = model(inputs)

        # Get the predicted class for each input
        _, predicted = torch.max(outputs.data, 1)

        # Count the number of correct predictions
        correct += (predicted == labels).sum().item()

        # Count the total number of inputs
        total += labels.size(0)

# Compute the accuracy
accuracy = correct / total
print('validation Accuracy: {:.2%}'.format(accuracy))

validation Accuracy: 96.59%


# dataset 2

In [6]:
dataset = pd.read_csv('./DS2/drug_information_1258.csv')
dataset

Unnamed: 0,index,id,target,enzyme,smile,name
0,0,DB00006,P00734,P05164,1|41|79|80|108|117|140|143|173|193|197|242|269...,Bivalirudin
1,1,DB00035,P30518|P37288|P47901,P23219|P35354,1|53|80|115|117|140|143|173|193|197|242|253|30...,Desmopressin
2,2,DB00091,P49069|Q96LZ3|P62937|P30405,P20815|P08684|P33261|P10635,1|5|19|38|47|80|101|115|126|132|186|208|219|22...,Cyclosporine
3,3,DB00115,Q99707|P22033|Q9UBK8|Q8IVH4|Q9Y4U1|P42898,Q96EY8|Q05599,1|35|41|45|49|75|80|84|106|140|188|192|194|197...,Cyanocobalamin
4,4,DB00118,Q14749|P17707|P31153|P35520|Q00266|P21964|Q8N1...,P05181|P19623,1|75|80|194|209|348|362|378|454|457|489|577|61...,Ademetionine
...,...,...,...,...,...,...
1253,1253,DB15444,P13569,P08684|P20815|P05177|P20813|P10632|P11712|P332...,40|45|59|67|73|80|92|114|119|130|202|295|307|3...,Elexacaftor
1254,1254,DB15488,Q92847|P10275,P21964,13|25|31|80|104|170|176|193|222|310|315|489|55...,Echinacoside
1255,1255,DB15566,P04150,P08684,51|80|84|147|182|240|314|315|373|408|493|504|5...,Prednisoloneacetate
1256,1256,DB15598,P05106|P49281,P19224,166|253|271|314|650|656|677|715|751|787|835|84...,Ferricmaltol


In [7]:
vec = VectorizerApply(CountVectorizer)
X = vec.fit_transform(dataset,id='id',index='index',name='name')
X

Unnamed: 0,id,name,a0a024r8i1,a0a0e1r3h3,a0a0h2xj39,a0a0t9az62,a0a143zzk9,a0a144a2g5,a2qlk4,a5x5y0,...,99,990,991,992,993,994,996,997,998,999
0,DB00006,Bivalirudin,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,DB00035,Desmopressin,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,DB00091,Cyclosporine,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,DB00115,Cyanocobalamin,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,DB00118,Ademetionine,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1253,DB15444,Elexacaftor,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1254,DB15488,Echinacoside,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1255,DB15566,Prednisoloneacetate,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1256,DB15598,Ferricmaltol,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
extraction = pd.read_csv('./DS2/drug_interaction.csv')
extraction = extraction.drop('Unnamed: 0',axis=1)
extraction

Unnamed: 0,index,mechanism,action,drugA,drugB
0,0,the anticoagulant activities,increase,Apixaban,Bivalirudin
1,1,the anticoagulant activities,increase,Dabigatranetexilate,Bivalirudin
2,2,The risk or severity of bleeding and hemorrhage,increase,Dasatinib,Bivalirudin
3,4,the anticoagulant activities,increase,Bivalirudin,Rivaroxaban
4,6,The risk or severity of bleeding and hemorrhage,increase,Tipranavir,Bivalirudin
...,...,...,...,...,...
161765,323530,The metabolism,decrease,Zanubrutinib,Curcuminsulfate
161766,323531,The metabolism,increase,Elexacaftor,Betamethasonephosphate
161767,323535,The serum concentration,increase,Ubrogepant,Ripretinib
161768,323536,The metabolism,decrease,Avapritinib,Voxelotor


In [9]:
label_creator = Label_creator()
y = label_creator.fit_transform(extraction,index='index',mechanism='mechanism',action='action')
y

Unnamed: 0,drugA,drugB,interaction,interaction_numaber
0,Apixaban,Bivalirudin,the anticoagulant activities increase,30
1,Dabigatranetexilate,Bivalirudin,the anticoagulant activities increase,30
2,Dasatinib,Bivalirudin,The risk or severity of bleeding and hemorrhag...,1
3,Bivalirudin,Rivaroxaban,the anticoagulant activities increase,30
4,Tipranavir,Bivalirudin,The risk or severity of bleeding and hemorrhag...,1
...,...,...,...,...
161765,Zanubrutinib,Curcuminsulfate,The metabolism decrease,73
161766,Elexacaftor,Betamethasonephosphate,The metabolism increase,75
161767,Ubrogepant,Ripretinib,The serum concentration increase,45
161768,Avapritinib,Voxelotor,The metabolism decrease,73


In [13]:
drug_dataset = DrugDataset(X,y)

Process: 0.0 %
Process: 0.06181615874389565 %
Process: 0.1236323174877913 %
Process: 0.18544847623168695 %
Process: 0.2472646349755826 %
Process: 0.3090807937194783 %
Process: 0.3708969524633739 %
Process: 0.4327131112072695 %
Process: 0.4945292699511652 %
Process: 0.5563454286950609 %
Process: 0.6181615874389566 %
Process: 0.6799777461828522 %
Process: 0.7417939049267478 %
Process: 0.8036100636706435 %
Process: 0.865426222414539 %
Process: 0.9272423811584348 %
Process: 0.9890585399023304 %
Process: 1.0508746986462263 %
Process: 1.1126908573901217 %
Process: 1.1745070161340174 %
Process: 1.236323174877913 %
Process: 1.2981393336218088 %
Process: 1.3599554923657045 %
Process: 1.4217716511096001 %
Process: 1.4835878098534956 %
Process: 1.5454039685973915 %
Process: 1.607220127341287 %
Process: 1.6690362860851828 %
Process: 1.730852444829078 %
Process: 1.792668603572974 %
Process: 1.8544847623168697 %
Process: 1.9163009210607655 %
Process: 1.9781170798046608 %
Process: 2.0399332385485565 

  self.data = torch.Tensor(final_X)


In [20]:
# load saved dataset
# with open('./saved_dataset/drug_dataset2.pickle','rb') as f:
#     drug_dataset = pickle.load(f)
#     drug_dataset
len(drug_dataset.classes)

100

In [14]:
# save dataset
if not os.path.exists('./saved_dataset/'):
    os.makedirs('./saved_dataset/')

with open('./saved_dataset/drug_dataset2.pickle','wb') as f:
    pickle.dump(drug_dataset,f)

In [11]:
# split dataset to train and validation section
train_size = int(0.8 * len(drug_dataset))
val_size = len(drug_dataset) - train_size

train_dataset , val_dataset = random_split(drug_dataset,lengths=[train_size,val_size])

In [12]:
# create data loader for train loader and val loader

train_loader = DataLoader(train_dataset,batch_size=16,shuffle=True)

val_loader = DataLoader(val_dataset,batch_size=16)

In [21]:
# Deep learning module

class CPSP2(torch.nn.Module):

    def __init__(self,in_channels,conv_mid_channel,out_dim,*args, **kwargs) -> None:
        super(CPSP2,self).__init__(*args, **kwargs)
        self.conv = torch.nn.Conv2d(in_channels=in_channels, out_channels=conv_mid_channel,kernel_size=(2,3),stride=2)
        self.bn1 = torch.nn.BatchNorm2d(conv_mid_channel)
        self.conv1_1 = torch.nn.Conv2d(in_channels=conv_mid_channel,out_channels=1, kernel_size=1)
        self.bn2 = torch.nn.BatchNorm2d(1)
        self.fc1 = torch.nn.Linear(1998, 2048)
        self.fc2 = torch.nn.Linear(2048, 1024)
        self.fc3 = torch.nn.Linear(1024, out_dim)

    def forward(self, x):
        x = x.unsqueeze(1)
        x = self.conv(x)
        x = torch.nn.functional.selu(x)
        x = self.bn1(x)
        x = self.conv1_1(x)
        x = torch.nn.functional.selu(x)
        x = self.bn2(x)
        x = x.view(-1,int(x.nelement() / x.shape[0]))
        x = self.fc1(x)
        x = torch.nn.functional.selu(x)
        x = self.fc2(x)
        x = torch.nn.functional.selu(x)
        x = self.fc3(x)
        return x

In [22]:
# create model and train loop
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = CPSP2(1,3,100).to(device=device)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(),lr=0.0001,momentum=0.9)

N_epochs = 100

pre_val_loss = np.inf

for epoch in range(N_epochs):
    # Training
    train_loss = 0.0
    model.train()
    for inputs, labels in train_loader:
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor).to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    
    # Validation
    val_loss = 0.0
    model.eval()
    for inputs, labels in val_loader:
        inputs = inputs.to(device)
        labels = labels.type(torch.LongTensor).to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
    if val_loss >= pre_val_loss:
        print("Epoch: {} Train Loss: {} Val Loss: {}".format(epoch,
                                                         train_loss/len(train_loader),
                                                         val_loss/len(val_loader)))
        break
    else:
        pre_val_loss = val_loss
        print("Epoch: {} Train Loss: {} Val Loss: {}".format(epoch,
                                                         train_loss/len(train_loader),
                                                         val_loss/len(val_loader)))
    

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
