# Drug Interactions Network Analysis
**Part 2 - Network Visualization**

___
### 1. Import dependencies

In [25]:
import pandas as pd
import numpy as np
import re
import zipfile
import json
from pyvis.network import Network
import networkx as nx

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

___
### 2. Data preparation

In [26]:
# Unzip all tar/zip files
zip_files_list = [i for i in os.listdir('data') if i.endswith('.zip')]

for file in zip_files_list:
    with zipfile.ZipFile(f'data/{file}', 'r') as zip_ref:
        zip_ref.extractall('data')

<IPython.core.display.Javascript object>

#### Drug Interactions (Drugbank)

In [27]:
# Read DB mapping JSON
with open('data/DB_mapping.json', 'r') as fp:
    db_mapping = json.load(fp)

In [28]:
# Import raw drugbank dataset
df_db_int = pd.read_csv("data/ChCh-Miner_durgbank-chem-chem.tsv", sep='\t', header=None)
df_db_int.columns = ['drug_1_code', 'drug_2_code']

# Perform code-name mapping
df_db_int['drug_1_name'] = df_db_int['drug_1_code'].map(db_mapping)
df_db_int['drug_2_name'] = df_db_int['drug_2_code'].map(db_mapping)

new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name']

# Clean data 
df_db_int = df_db_int[~df_db_int['drug_1_name'].isin(['This record has been revoked'])]
df_db_int = df_db_int[~df_db_int['drug_2_name'].isin(['This record has been revoked'])]

df_db_int = df_db_int[new_cols]
df_db_int.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name
0,DB00862,Vardenafil,DB00966,Telmisartan
1,DB00575,Clonidine,DB00806,Pentoxifylline
2,DB01242,Clomipramine,DB08893,Mirabegron
3,DB01151,Desipramine,DB08883,Perampanel
4,DB01235,Levodopa,DB01275,Hydralazine


#### 2(b) Polypharmacy side effects

In [29]:
# Read CID mapping JSON
with open('data/CID_mapping.json', 'r') as fp:
    cid_mapping = json.load(fp)

In [30]:
# Import dataset
df_poly_se = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df_poly_se.columns = ['drug_1_code', 'drug_2_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_poly_se['drug_1_name'] = df_poly_se['drug_1_code'].map(cid_mapping)
df_poly_se['drug_2_name'] = df_poly_se['drug_2_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name', 
            'side_effect_code', 'side_effect_description']

df_poly_se = df_poly_se[new_cols]
df_poly_se.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name,side_effect_code,side_effect_description
0,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0151714,hypermagnesemia
1,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0035344,retinopathy of prematurity
2,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004144,atelectasis
3,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0002063,alkalosis
4,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004604,Back Ache


#### 2(c) Monopharmacy side effects

In [31]:
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")

# Import dataset
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")
df_mono_se.columns = ['drug_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_mono_se['drug_name'] = df_mono_se['drug_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_code', 'drug_name', 'side_effect_code', 'side_effect_description']

df_mono_se = df_mono_se[new_cols]
df_mono_se.head()

Unnamed: 0,drug_code,drug_name,side_effect_code,side_effect_description
0,CID003062316,Dasatinib,C1096328,central nervous system mass
1,CID003062316,Dasatinib,C0162830,Photosensitivity reaction
2,CID003062316,Dasatinib,C1611725,leukaemic infiltration brain
3,CID003062316,Dasatinib,C0541767,platelet adhesiveness abnormal
4,CID003062316,Dasatinib,C0242973,Ventricular dysfunction


### 3. Network Visualization

#### Drug Bank Drug Interactions

In [32]:
df_db_int['weight'] = 0.1
df_db_int.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name,weight
0,DB00862,Vardenafil,DB00966,Telmisartan,0.1
1,DB00575,Clonidine,DB00806,Pentoxifylline,0.1
2,DB01242,Clomipramine,DB08893,Mirabegron,0.1
3,DB01151,Desipramine,DB08883,Perampanel,0.1
4,DB01235,Levodopa,DB01275,Hydralazine,0.1


In [45]:
db_net = Network(height='500px', width='500px', 
                 bgcolor='white', font_color='blue',
                 notebook=True)

# set the physics layout of the network
db_net.barnes_hut()

sources = df_db_int['drug_1_name']
targets = df_db_int['drug_2_name']
weights = df_db_int['weight']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    db_net.add_node(src, src, title=src)
    db_net.add_node(dst, dst, title=dst)
    db_net.add_edge(src, dst, value=w)

# Add neighbor data to node hover data
# neighbor_map = db_net.get_adj_list()

# for node in db_net.nodes:
#     node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
#     node['value'] = len(neighbor_map[node['id']])

db_net.show('drug_interactions_db.html')

KeyboardInterrupt: 

### 4. Narrow down data and network

In [42]:
# Define list of oral medications for the common diseases: Diabetes, Hypertension and Hyperlipidemia
dm_meds = ['Metformin', 'Linagliptin', 'Sitagliptin', 'Glibenclamide', 'Gliclazide',
          'Glimepiride', 'Glipizide', 'Tolbutamide', 'Canagliflozin', 'Dapagliflozin',
          'Empagliflozin', 'Dapagliflozin', 'Acarbose', 'Liraglutide']

cardio_meds = ['Captopril', 'Enalapril', 'Lisinopril', 'Perindopril', 'Irbesartan',
               'Losartan', 'Telmisartan', 'Valsartan', 'Candesartan', 'Atenolol', 
               'Bisoprolol', 'Carvedilol', 'Propranolol', 'Amlodipine', 'Diltiazem', 
               'Nifedipine', 'Verapamil', 'Bumetanide', 'Frusemide', 'Isosorbide',
               'Hydrochlorothiazide', 'Spironolactone', 'Isosorbide Dinitrate', 
               'Isosorbide Mononitrate', 'Aspirin', 'Clopidogrel', 'Ticagrelor', 
               'Atorvastatin', 'Rosuvastatin', 'Simvastatin', 'Pravastatin']  

random_meds = ['Metformin', 'Simvastatin', 'Lisinopril']

In [43]:
# Focus on specific meds
df_db_int_sm = df_db_int.loc[df_db_int['drug_1_name'].isin(random_meds) | df_db_int['drug_2_name'].isin(random_meds)]
df_db_int_sm = df_db_int_sm.reset_index(drop=True)
df_db_int_sm

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name,weight
0,DB00641,Simvastatin,DB09036,Siltuximab,0.1
1,DB00331,Metformin,DB00780,Phenelzine,0.1
2,DB00641,Simvastatin,DB06210,Eltrombopag,0.1
3,DB00331,Metformin,DB00794,Primidone,0.1
4,DB00641,Simvastatin,DB01039,Fenofibrate,0.1
...,...,...,...,...,...
308,DB00220,Nelfinavir,DB00331,Metformin,0.1
309,DB00237,Butabarbital,DB00722,Lisinopril,0.1
310,DB00331,Metformin,DB00783,Estradiol,0.1
311,DB00209,Trospium,DB00331,Metformin,0.1


In [49]:
db_net_sm = Network(height='700px', width='100%', 
                    bgcolor='white', font_color='black',
                    notebook=True)

# set the physics layout of the network
db_net_sm.barnes_hut()

sources = df_db_int_sm['drug_1_name']
targets = df_db_int_sm['drug_2_name']
weights = df_db_int_sm['weight']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    db_net_sm.add_node(src, src, title=src)
    db_net_sm.add_node(dst, dst, title=dst)
    db_net_sm.add_edge(src, dst, value=w)

# Add neighbor data to node hover data
neighbor_map = db_net_sm.get_adj_list()

for node in db_net_sm.nodes:
    node['title'] += ' <b>Neighbors:</b><br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

db_net_sm.show('drug_interactions_db.html')