# Drug-Drug Interaction using Deep Learning

* Upload the lab_resources and DDI_nn files to you Drive Account:
  * Lab_resource: https://www.cs.upc.edu/~turmo/mud/lab/lab_resources.zip
  * DDI_nn code: https://www.cs.upc.edu/~turmo/mud/lab/07-DDI-nn.zip
* Before running the code, ensure that your Google Colab is set to use GPU:
  * Edit → Notebook Settings
* Mount your Drive disk unit:
  * Left-side menu → Files → Mount drive (the icon that looks like a folder with the Drive logo).


Define the paths to the data and utils in your Drive unit:

# Loading

In [14]:
pip install neptune



In [17]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [18]:
utilsdir='/content/drive/MyDrive/07-DDI-nn'
evaluatordir='/content/drive/MyDrive/07-DDI-nn/util'
trainfile='/content/drive/MyDrive/07-DDI-nn/train.pck'
validationfile='/content/drive/MyDrive/07-DDI-nn/devel.pck'
testfile='/content/drive/MyDrive/07-DDI-nn/test.pck'
validationdir='/content/drive/MyDrive/07-DDI-nn/data/devel'
modelname ='model'
outfile ='out.txt'

In [19]:
!pip install tensorflow-addons
import sys
sys.path.insert(1,utilsdir) # Path to the utils folder on your Google Drive disk
sys.path.insert(1,evaluatordir) # Path to the evaluator folder on your Google Drive disk



In [24]:

from contextlib import redirect_stdout

from tensorflow.keras import Input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv1D, Flatten, Embedding, Dense, TimeDistributed, Dropout, Bidirectional, concatenate, Softmax
from tensorflow.keras.layers import Embedding, Dense, Dropout, Conv1D, MaxPool1D, Reshape, concatenate, Flatten, Bidirectional, LSTM

from codemaps import *
from dataset import *

import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

# Build network

In [21]:
def build_network(codes) :

   # sizes
   n_words = codes.get_n_words() # word embedding
   max_len = codes.maxlen
   n_sufs = codes.get_n_sufs() #new
   n_labels = codes.get_n_labels()
   n_prefs = codes.get_n_prefs()
   n_lowers = codes.get_n_lowers()
   n_pos = codes.get_n_lowers()


   # word input layer & embeddings
   inputs = []
   inptW = Input(shape=(max_len,))
   inptS = Input(shape=(max_len,))
   inptP = Input(shape=(max_len,))
   inptL = Input(shape=(max_len,))
   inptPo = Input(shape=(max_len,))

   embW = Embedding(input_dim=n_words, output_dim=150,
                      input_length=max_len, mask_zero=False)(inptW)  # word embeddings

   embS = Embedding(input_dim=n_sufs, output_dim=50,
                      input_length=max_len, mask_zero=False)(inptS)  # suf embeddings

   embP = Embedding(input_dim=n_prefs, output_dim=50,
                    input_length=max_len, mask_zero=False)(inptP) # pref embeddings

   embL = Embedding(input_dim=n_lowers, output_dim=150,
                    input_length=max_len, mask_zero=False)(inptL) # lowers embeddings

   embPo = Embedding(input_dim=n_pos, output_dim=150,
                    input_length=max_len, mask_zero=False)(inptPo) # lowers embeddings

   embW = Dropout(0.1)(embW)
   embS = Dropout(0.1)(embS)
   embP = Dropout(0.1)(embP)
   embL = Dropout(0.1)(embL)
   embPo = Dropout(0.1)(embPo)

   embeddings = concatenate([embW,embS,embP,embL, embPo])

   l1_conv1 = Conv1D(filters=30, kernel_size=5, strides=1, activation='relu', padding='same')(embeddings)
   l1_conv2 = Conv1D(filters=30, kernel_size=3, strides=1, activation='relu', padding='same')(embeddings)
   l1_conv = concatenate([l1_conv1, l1_conv2])
   l1_max = MaxPool1D(pool_size=2, strides=1)(l1_conv)
   l2_conv1 = Conv1D(filters=60, kernel_size=5, strides=1, activation='relu', padding='same')(l1_max)
   l2_max = MaxPool1D(pool_size=2, strides=1)(l2_conv1)
   flat = Flatten()(l2_max)
   flat = Dropout(0.1)(flat)

   n_labels = codes.get_n_labels()
   out = Dense(n_labels, activation='softmax')(flat)

   model = Model(inputs, out)
   model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


   return model

In [22]:
# load train and validation data
traindata = Dataset(trainfile)
valdata = Dataset(validationfile)

# create indexes from training data
max_len = 150
suf_len = 5
codes = Codemaps(traindata, max_len)

# encode datasets
Xt = codes.encode_words(traindata)
Yt = codes.encode_labels(traindata)
Xv = codes.encode_words(valdata)
Yv = codes.encode_labels(valdata)

n_tags = codes.get_n_labels()
max_len = codes.maxlen

In [23]:
model = build_network(codes)
model.compile(optimizer='adam' ,metrics=["accuracy"], loss="categorical_crossentropy")
model.build([(None,max_len),(None,max_len),(None,max_len)])

with redirect_stdout(sys.stderr) :
   model.summary()

AttributeError: 'Codemaps' object has no attribute 'get_n_sufs'

In [None]:
## --------- MAIN PROGRAM -----------
## --
## -- Usage:  train.py ../data/Train ../data/Devel  modelname
## --

# train model
with redirect_stdout(sys.stderr) :
   model.fit(Xt, Yt, batch_size=32, epochs=10, validation_data=(Xv,Yv), verbose=1)

# save model and indexs
model.save(modelname)
codes.save(modelname)
#save_model_and_indexs(model, idx, modelname)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Predict

In [None]:
#import sys
import evaluator

In [None]:
def output_interactions(data, preds, outfile) :

   #print(testdata[0])
   outf = open(outfile, 'w')
   for exmp,tag in zip(data.sentences(),preds) :
      sid = exmp['sid']
      e1 = exmp['e1']
      e2 = exmp['e2']
      if tag!='null' :
         print(sid, e1, e2, tag, sep="|", file=outf)

   outf.close()

In [None]:
## --------- Evaluator -----------
def evaluation(datadir,outfile) :
   evaluator.evaluate("DDI", datadir, outfile)


In [None]:
## --------- MAIN PROGRAM -----------
## --
## -- Usage:  baseline-NER.py target-dir
## --
## -- Extracts Drug NE from all XML files in target-dir
## --

X = codes.encode_words(valdata)
Y = model.predict(X)
Y = [codes.idx2label(np.argmax(s)) for s in Y]

# extract entities
output_interactions(valdata, Y, outfile)

# evaluate
evaluation(validationdir,outfile)


                   tp	  fp	  fn	#pred	#exp	P	R	F1
------------------------------------------------------------------------------
advise             80	  48	  61	 128	 141	62.5%	56.7%	59.5%
effect            159	 120	 153	 279	 312	57.0%	51.0%	53.8%
int                14	   2	  14	  16	  28	87.5%	50.0%	63.6%
mechanism          79	  62	 182	 141	 261	56.0%	30.3%	39.3%
------------------------------------------------------------------------------
M.avg            -	-	-	-	-	65.8%	47.0%	54.1%
------------------------------------------------------------------------------
m.avg             332	 232	 410	 564	 742	58.9%	44.7%	50.8%
m.avg(no class)   382	 182	 360	 564	 742	67.7%	51.5%	58.5%
