In [1]:
import pandas as pd
import numpy as np
import networkx as nx

## Load relation matrices

1. Drug-contraindication (sparsity - 0.9938473520249221)
2. Drug-drug-interaction (sparsity - 0.99106529209622)
3. Drug-indication (sparsity - 0.9944690891930156)
4. Drug-MOA (sparsity - 0.9943156923786464)
5. Drug-pathway (sparsity - 0.9900473582951014)
6. Drug-target (sparsity - 0.9939774202952323)

In [2]:
drug_contraindication_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-contraindication.csv')
drug_drug_interaction_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-drug-interaction.csv')
drug_indication_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-indication.csv')
drug_MOA_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-MOA.csv')
drug_pathway_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-pathway.csv')
drug_target_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/drug-target.csv')

In [3]:
relation_matrix_list = [drug_contraindication_df,drug_drug_interaction_df,drug_indication_df,drug_MOA_df,drug_pathway_df,drug_target_df]

## Normalise all columns by removing whitespaces, lowercasing and create edge list

In [4]:
def change_relation_to_edge(df):
    cols = df.columns
    df[cols[0]] = df[cols[0]].apply(lambda x:x.lower())
    df[cols[1]] = df[cols[1]].apply(lambda x:x.lower())
    df['edge'] = list(zip(df[cols[0]], df[cols[1]]))    

In [5]:
edge_list = []
vertex_list = []
for df in relation_matrix_list:
    change_relation_to_edge(df)
    df_edge_list = df['edge'].tolist()
    edge_list = edge_list + df_edge_list
    cols = df.columns
    list_1 = list(set(df[cols[0]].tolist()))
    list_2 = list(set(df[cols[1]].tolist()))
    vertex_list = vertex_list + list_1 + list_2
vertex_list = list(set(vertex_list))

## Convert to undirected graph 

In [6]:
graph = {'nodes':vertex_list,'edges':edge_list}
n = len(vertex_list)
print(n)
print(len(edge_list))
adj_matr = pd.DataFrame(0, columns=graph['nodes'], index=graph['nodes'])
for i in graph['edges']:
    adj_matr.at[i[0], i[1]] = 1
    adj_matr.at[i[1], i[0]] = 1

1082
2185


In [59]:
sparr = adj_matr.apply(pd.arrays.SparseArray)
sparsity = 1 - sparr.sparse.density
print(sparsity)

0.9962826421940612


In [7]:
adj_matr

Unnamed: 0,anterior eye inflammation,necroptosis,when co-administered with apomorphine,lyn,av block,ip1 prostacyclin receptor agonist,nsaids,metaxalone,mineral absorption,chrm3,...,csnk1e,scn7a,trpv5,goserelin,tetrahydrozoline hydrochloride,gi resection or anastomosis,lovastatin,hemoglobin modulator,milnacipran,ethambutol hydrochloride
anterior eye inflammation,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
necroptosis,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
when co-administered with apomorphine,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
lyn,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
av block,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
gi resection or anastomosis,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
lovastatin,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
hemoglobin modulator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
milnacipran,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [60]:
adj_matr.to_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/master-graph-adj-undirected.csv')

## Create Feature Matrix

Drug chemical and assay properties have been used as node features for the drug. For other node types like Pathways, MOA, Target etc, a 'no_feature' feature has been introduced. The value of this feature is 1 for node-types without a feature and 0 for node-types with a feature.

In [81]:
drug_feature_df = pd.read_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/relationship/feature-vector.csv')

In [82]:
drug_feature_df = drug_feature_df.fillna(0)

In [83]:
drug_feature_df.shape

(1082, 58)

In [84]:
drug_feature_df

Unnamed: 0,Name,EC50_value_original_target,IC50_value_original_target,pIC50_value_original_target,MW,logP,HBA,HBD,RotBonds,PSA,...,1508605_Potency(μM),1508605_Efficacy(%),1508605_Activity,1508605_Tox,1508605_Cytotox_CC50(uM),1508605_perc_Cytotox,t_half,Bioavailability,protein-binding,no_features
0,tolterodine (tartrate),0.0,0.0,8.2006,475.582,3.21882,6.0,5.0,10.0,138.53,...,0.0,0.0,0.0,0.0,0.0,0.0,2.8,0.0,0.96,0
1,palonosetron (hydrochloride),0.0,0.0,0.0000,332.875,3.07830,2.0,0.0,1.0,23.55,...,0.0,0.0,0.0,0.0,0.0,0.0,40.0,0.0,0.62,0
2,dapoxetine (hydrochloride),0.0,0.0,0.0000,341.882,5.33340,2.0,0.0,6.0,12.47,...,0.0,0.0,0.0,0.0,0.0,0.0,1.5,0.0,0.00,0
3,solifenacin (succinate),0.0,0.0,7.1970,480.561,3.80070,5.0,2.0,5.0,107.38,...,0.0,0.0,0.0,0.0,0.0,0.0,59.0,0.0,0.94,0
4,indinavir (sulfate),0.0,150.0,6.8240,711.882,2.21410,9.0,6.0,11.0,192.63,...,0.0,0.0,0.0,0.0,0.0,0.0,1.8,0.0,0.60,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,adrenal gland failure,0.0,0.0,0.0000,0.000,0.00000,0.0,0.0,0.0,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,1
1078,renal failure,0.0,0.0,0.0000,0.000,0.00000,0.0,0.0,0.0,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,1
1079,severe anemia,0.0,0.0,0.0000,0.000,0.00000,0.0,0.0,0.0,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,1
1080,shock,0.0,0.0,0.0000,0.000,0.00000,0.0,0.0,0.0,0.00,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,1


In [85]:
drug_feature_df.to_csv('/Users/chaarvibansal/Desktop/drug-repurposing/data/feature-vector-final.csv')