In [1]:
import pandas as pd
import scipy.io as sio
import numpy as np
import math

In [2]:
vi_dr_indi=pd.read_excel('data_raw/database.xlsx', sheet_name='Indications(vi-dr)')

In [3]:
vi_dr_indi=vi_dr_indi[['VIRUS', 'DRUG']]

In [4]:
no_v = len(vi_dr_indi['VIRUS'].unique() )
no_d= len(vi_dr_indi['DRUG'].unique() )

In [5]:
no_v
no_d

121

#### creating interaction matrix

In [6]:
vi_dr_matrix=vi_dr_indi.groupby(['VIRUS', 'DRUG']).size().unstack().fillna(0) #https://www.drawingfromdata.com/making-a-pairwise-distance-matrix-with-pandas

In [7]:
vi_dr_matrix=vi_dr_matrix.sort_index(axis=1)
vi_dr_matrix.columns

Index(['Abacavir', 'Acyclovir', 'Adefovir', 'Adefovir dipivoxil', 'Amantadine',
       'Amprenavir', 'Asunaprevir', 'Atazanavir', 'Baloxavir marboxil',
       'Beclabuvir',
       ...
       'Valaciclovir', 'Valganciclovir', 'Valomaciclovir', 'Vaniprevir',
       'Velpatasvir', 'Vidarabine', 'Voxilaprevir', 'Zalcitabine', 'Zanamivir',
       'Zidovudine'],
      dtype='object', name='DRUG', length=121)

In [8]:
vi_dr_matrix.to_csv('data_processed/virus_drug_association.csv', header=False, index=False)

sio.savemat('data_processed/virus_drug_association.mat',{'mat':vi_dr_matrix.values , 'vi':vi_dr_matrix.index.values, 'dr':vi_dr_matrix.columns.values})

In [9]:
vi_dr_matrix.index.values.T

array(['Chikungunya virus', 'Coxsackievirus B5', 'Ebola virus',
       'Enterovirus D', 'Enterovirus J', 'HBV', 'HCV', 'HHV-1', 'HHV-2',
       'HHV-3', 'HHV-4', 'HHV-5', 'HHV-6', 'HHV-8', 'HIV-1', 'HIV-2',
       'HPIV-2', 'HPIV-3', 'HPV', 'HRV-A', 'HRV-B', 'HTLV-1',
       'Hantaan orthohantavirus', 'Hendra virus', 'Human adenovirus',
       'Human polyomavirus', 'Influenza A virus', 'Influenza B virus',
       'Influenza C virus', 'Lassa mammarenavirus', 'MCV', 'MERS-CoV',
       'Nipah virus', 'RSV', 'SARS-CoV', 'SARS-CoV-2', 'VV',
       'Variola virus', 'Zika virus'], dtype=object)

In [10]:
vi_dr_matrix.columns

Index(['Abacavir', 'Acyclovir', 'Adefovir', 'Adefovir dipivoxil', 'Amantadine',
       'Amprenavir', 'Asunaprevir', 'Atazanavir', 'Baloxavir marboxil',
       'Beclabuvir',
       ...
       'Valaciclovir', 'Valganciclovir', 'Valomaciclovir', 'Vaniprevir',
       'Velpatasvir', 'Vidarabine', 'Voxilaprevir', 'Zalcitabine', 'Zanamivir',
       'Zidovudine'],
      dtype='object', name='DRUG', length=121)

### Creating drug similarity matrix
* 1. get KEGG IDs for the drugs
* 2. add KEGG IDs manually for the drugs which dont have KEGG IDs in mapping file (drug links.csv) obtained from DrugBank
* 3. use rest API to get SIMCOMP score: https://www.genome.jp/tools/gn_tools_api.html   to get SIMCOMP similarities using KEGG IDs
* 4. convert similarity score fetched from API to matrix

##### 1. get KEGG IDs for the drugs

In [11]:
#FETCH KEGG IDS AND WRITE TO NEW UPDATES DRUGS FILE with_KEGG_IDs

#drugs=pd.read_excel('data_raw/database.xlsx', sheet_name='Drug_info')
#mapping_file=pd.read_csv('data_raw/drug links.csv') #obtained from https://www.drugbank.ca/releases/latest#external-links  : has wrong drug linking for oseltamivir..corrected

#drugs_with_KEGG_IDs = pd.merge(mapping_file, drugs, left_on='DrugBank ID',right_on='DRUGBANK ID', how='right')
#drugs_with_KEGG_IDs=drugs_with_KEGG_IDs[['DRUGBANK ID','DrugBank ID','Name','KEGG Compound ID', 'KEGG Drug ID']].sort_values('DRUGBANK ID')
#drugs_with_KEGG_IDs.to_csv('data_processed/drugs_withKEGGIDs.csv', index=False)

##### 2. Add KEGG IDs of drusg with missing IDs manually

##### 3. use rest API to get SIMCOMP score:

In [12]:
#read
drugs_KEGGSupdated=pd.read_excel('data_raw/database.xlsx', sheet_name='Drug_info')#118 drugs (KEGG IDs of 3 drugs missing)
drugs_with_KEGG_IDs=drugs_KEGGSupdated[['DRUGBANK ID','DrugBank ID','Name','KEGG Compound ID', 'KEGG Drug ID']].sort_values(by=['KEGG Compound ID','KEGG Drug ID'])

In [13]:
#all drugs
dr_list1=drugs_with_KEGG_IDs['Name'].values.tolist() #117 drugs

#drugs having KEGG ID
'''drugs_with_KEGG_IDs['KEGG Combined ID']=drugs_with_KEGG_IDs['KEGG Compound ID'].fillna('') + drugs_with_KEGG_IDs['KEGG Drug ID'].fillna('')
drugs_with_KEGG_IDs['KEGG Combined ID'].replace('', np.nan, inplace=True)
dr_list2a=drugs_with_KEGG_IDs.dropna(subset = ['KEGG Combined ID']) #114 drugs'''

"drugs_with_KEGG_IDs['KEGG Combined ID']=drugs_with_KEGG_IDs['KEGG Compound ID'].fillna('') + drugs_with_KEGG_IDs['KEGG Drug ID'].fillna('')\ndrugs_with_KEGG_IDs['KEGG Combined ID'].replace('', np.nan, inplace=True)\ndr_list2a=drugs_with_KEGG_IDs.dropna(subset = ['KEGG Combined ID']) #114 drugs"

In [14]:
cmnd=""
dr_list2=[] # 101 drugs (13 drugs had too low SIMCOMP score with all other 114 drugs)
for i in range(len(drugs_with_KEGG_IDs.index)):
    nonans=drugs_with_KEGG_IDs.iloc[i].isnull().sum()  
    
    if nonans==1:
               
        if pd.isnull(drugs_with_KEGG_IDs.iloc[i, 3]):
            #print(drugs_with_KEGG_IDs.iloc[i, 4])
            cmnd=cmnd+ drugs_with_KEGG_IDs.iloc[i, 4]
            
        else:
            #print(drugs_with_KEGG_IDs.iloc[i, 3])
            cmnd=cmnd+ drugs_with_KEGG_IDs.iloc[i, 3]
        dr_list2.append(drugs_with_KEGG_IDs.iloc[i, 2])
        cmnd=cmnd+ '+'
    
    elif nonans==0:
        
        #print(drugs_with_KEGG_IDs.iloc[i, 3])
        cmnd=cmnd+ drugs_with_KEGG_IDs.iloc[i, 3]
        dr_list2.append(drugs_with_KEGG_IDs.iloc[i, 2])
        cmnd=cmnd+ '+'  
    
print(cmnd) #lists all drug KEGG IDs (114) except those which have both NANs in KEGG drug and compund ID
#len(dr_list2)#same as drug_list2a (drugs having KEGG ID)

C01588+C05682+C06456+C06810+C06818+C06909+C06941+C06953+C07043+C07051+C07065+C07184+C07195+C07207+C07210+C07236+C07240+C07257+C07263+C07312+C07417+C07624+C07625+C08086+C08088+C08092+C08095+C09147+C10874+C11222+C11277+C12599+C12871+C13480+C15643+C17407+C21598+D00317+D00333+D00342+D00391+D00423+D00429+D00745+D01784+D01982+D01995+D02495+D02496+D02497+D02499+D02500+D02736+D02737+D02744+D02747+D02748+D02768+D03305+D03537+D03656+D03829+D03884+D03956+D03981+D04008+D04112+D04552+D04554+D04859+D05528+D06275+D06651+D06670+D06675+D06676+D06677+D07199+D07249+D07250+D07471+D08605+D08876+D08997+D09012+D09390+D09410+D09537+D09575+D09720+D09881+D09971+D09987+D10066+D10081+D10093+D10105+D10366+D10428+D10442+D10462+D10558+D10580+D10582+D10610+D10624+D10625+D10639+D10745+D10801+D10806+D10814+D10816+D10899+D10909+D11021+D11399+D11472+


In [15]:
set(dr_list1)-set(dr_list2)

{'Peginterferon alfacon-1', 'Triazavirin', 'UC-781'}

Copy paste the following command

In [16]:
#http://rest.genome.jp/simcomp2/C01588+C05682+C06456+C06810+C06818+C06909+C06941+C06953+C07043+C07051+C07065+C07184+C07195+C07207+C07210+C07236+C07240+C07257+C07263+C07312+C07417+C07624+C07625+C08086+C08088+C08092+C08095+C09147+C10874+C11222+C11277+C12599+C12871+C13480+C15643+C17407+C21598+D00317+D00333+D00342+D00391+D00423+D00429+D00745+D01784+D01982+D01995+D02495+D02496+D02497+D02499+D02500+D02736+D02737+D02744+D02747+D02748+D02768+D03305+D03537+D03656+D03829+D03884+D03956+D03981+D04008+D04112+D04552+D04554+D04859+D05528+D06275+D06651+D06670+D06675+D06676+D06677+D07199+D07249+D07250+D07471+D08605+D08876+D08997+D09012+D09390+D09410+D09537+D09575+D09720+D09881+D09971+D09987+D10066+D10081+D10093+D10105+D10366+D10428+D10442+D10462+D10558+D10580+D10582+D10610+D10624+D10625+D10639+D10745+D10801+D10806+D10814+D10816+D10899+D10909+D11021+D11399+D11472/C01588+C05682+C06456+C06810+C06818+C06909+C06941+C06953+C07043+C07051+C07065+C07184+C07195+C07207+C07210+C07236+C07240+C07257+C07263+C07312+C07417+C07624+C07625+C08086+C08088+C08092+C08095+C09147+C10874+C11222+C11277+C12599+C12871+C13480+C15643+C17407+C21598+D00317+D00333+D00342+D00391+D00423+D00429+D00745+D01784+D01982+D01995+D02495+D02496+D02497+D02499+D02500+D02736+D02737+D02744+D02747+D02748+D02768+D03305+D03537+D03656+D03829+D03884+D03956+D03981+D04008+D04112+D04552+D04554+D04859+D05528+D06275+D06651+D06670+D06675+D06676+D06677+D07199+D07249+D07250+D07471+D08605+D08876+D08997+D09012+D09390+D09410+D09537+D09575+D09720+D09881+D09971+D09987+D10066+D10081+D10093+D10105+D10366+D10428+D10442+D10462+D10558+D10580+D10582+D10610+D10624+D10625+D10639+D10745+D10801+D10806+D10814+D10816+D10899+D10909+D11021+D11399+D11472/cutoff=0.01

#### 4. convert similarity score fetched from API to matrix (after mapping KEGG drug/compund IDs to drug names and adding missed drugs)

In [17]:
drug_sim_raw=pd.read_excel('data_raw/database.xlsx', sheet_name='drug_sim_raw')[['DRUG1','DRUG2','SIMCOMP']]

In [18]:
len(drug_sim_raw['DRUG1'].unique()) #121 total drugs, no KEGG IDS hence no score calculation for 3 drugs, too low SIMCOMP score for 13 drugs..left with 103 drugs

105

In [19]:
#mapping KEGG IDs to drugbank names(some drugbank name missing )
mapping=drugs_with_KEGG_IDs[['KEGG Compound ID', 'Name']].rename(columns={'KEGG Compound ID':'KEGG Drug ID'}).append(drugs_with_KEGG_IDs[['KEGG Drug ID', 'Name']])
dic_dr=dict(mapping.values.tolist())
drug_sim_raw=drug_sim_raw.replace(dic_dr) 

In [20]:
#add 13 drugs having SIMCOMP<cutoff
dr_list3=drug_sim_raw['DRUG1'].unique().tolist() #103 drugs
dr_list_missing = (list(set(dr_list1)-set(dr_list3))) #13 missed drugs+3 drugs with no KEGG ID

In [21]:
for missed_drug in dr_list_missing:
    drug_sim_raw=drug_sim_raw.append({'DRUG1':missed_drug,'DRUG2':missed_drug,'SIMCOMP':1},ignore_index=True)

In [22]:
#convert to matrix
drug_sim_matrix =drug_sim_raw.pivot_table(columns='DRUG1', index='DRUG2', values='SIMCOMP')#.reset_index()
drug_sim_matrix=drug_sim_matrix.fillna(0) #replace NANs (SIMCOP SCORE less than cuoff; eg: http://rest.genome.jp/simcomp2/C06818+D08876+D09390/C06818+D08876+D09390/cutoff=0.01) with 0

In [23]:
drug_sim_matrix

DRUG1,Abacavir,Acyclovir,Adefovir,Adefovir dipivoxil,Amantadine,Amprenavir,Asunaprevir,Atazanavir,Baloxavir marboxil,Beclabuvir,...,Valaciclovir,Valganciclovir,Valomaciclovir,Vaniprevir,Velpatasvir,Vidarabine,Voxilaprevir,Zalcitabine,Zanamivir,Zidovudine
DRUG2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abacavir,1.000000,0.501832,0.414474,0.414474,0.156250,0.051724,0.151603,0.053352,0.095238,0.120690,...,0.325967,0.412429,0.287313,0.142061,0.181486,0.525424,0.129235,0.324503,0.195929,0.246459
Acyclovir,0.443662,1.000000,0.551724,0.551724,0.000000,0.018868,0.043478,0.059002,0.127273,0.069277,...,0.694215,0.530249,0.260163,0.027397,0.109741,0.617021,0.062740,0.416309,0.075269,0.302817
Adefovir,0.414474,0.551724,1.000000,1.000000,0.000000,0.052632,0.057143,0.057269,0.103448,0.079882,...,0.423841,0.315789,0.223709,0.026667,0.116505,0.520913,0.059850,0.236749,0.000000,0.171171
Adefovir dipivoxil,0.414474,0.551724,1.000000,1.000000,0.000000,0.052632,0.057143,0.057269,0.103448,0.079882,...,0.423841,0.315789,0.223709,0.026667,0.116505,0.520913,0.059850,0.236749,0.000000,0.171171
Amantadine,0.156250,0.000000,0.000000,0.000000,1.000000,0.061571,0.127695,0.000000,0.017544,0.116667,...,0.000000,0.000000,0.000000,0.092308,0.095541,0.000000,0.091160,0.102662,0.000000,0.122449
Amprenavir,0.051724,0.018868,0.052632,0.052632,0.061571,1.000000,0.262003,0.339286,0.257669,0.287129,...,0.205534,0.181989,0.131034,0.271754,0.251422,0.050725,0.185270,0.142241,0.151631,0.122047
Asunaprevir,0.133429,0.043478,0.057143,0.057143,0.102107,0.248304,1.000000,0.334981,0.234568,0.276347,...,0.094183,0.091644,0.072961,0.529093,0.318229,0.055556,0.504975,0.092308,0.103253,0.121076
Atazanavir,0.053352,0.059002,0.057269,0.057269,0.000000,0.339286,0.365360,0.927273,0.250000,0.264775,...,0.127379,0.109551,0.139535,0.264237,0.407821,0.055635,0.307692,0.059908,0.098266,0.067251
Baloxavir marboxil,0.095238,0.127273,0.103448,0.103448,0.017544,0.253823,0.234568,0.235813,1.000000,0.253165,...,0.131148,0.126984,0.097561,0.194896,0.305310,0.064516,0.211790,0.144465,0.000000,0.132404
Beclabuvir,0.120690,0.069277,0.079882,0.079882,0.116667,0.287129,0.267442,0.206313,0.267606,1.000000,...,0.062670,0.061008,0.086718,0.306884,0.295918,0.124438,0.337793,0.127214,0.037736,0.126332


In [24]:
drug_sim_matrix.to_csv('data_processed/similarity_drugs.csv', index=True)

sio.savemat('data_processed/drug_sim_matrix.mat',{'Sd':drug_sim_matrix.values , 'dr_names':drug_sim_matrix.index.values})

### Creating virus similarity matrix
* 1. fetch complete genome for viruses (preferably refseq) from NCBI as fasta files (data_raw/virus_completeGenome/)
* 2. find ONF (k-mer) based dissimilarities measures between them (data_processed/ONF_distance_shalini_system/ using VirHostMatcher) #has an empty last colmn
* 3. subtract from 1 to get the similarity
* 4. map viral genomic sequnce id to virus name

##### 1. fetch complete genome for viruses: done manually

##### 2. find ONF based distance measure: ran VirhostMatcher on Linux machine (Shalini's system)

##### 3. subtract from 1 to get the similarity

In [26]:
vir_sim_matrix=1-pd.read_csv('data_processed/ONF_distance/d2star_k6.csv', index_col=0, header=0)

vir_sim_matrix= vir_sim_matrix.iloc[:, :-1] #Remove last empty col
vir_sim_matrix.columns=vir_sim_matrix.columns.str.rstrip('.fasta') #strip .fasta suffexi from col headers
vir_sim_matrix.index=vir_sim_matrix.index.str.rstrip('.fasta') #strip .fasta suffexi from row indices
vir_sim_matrix

Unnamed: 0_level_0,NC_019843.3,NC_004297.0,NC_001802.1,NC_001722.1,NC_002023.0,NC_004718.3,NC_006998.1,AC_000017.1,NC_003977.2,NC_012532.1,...,NC_001806.2,NC_001796.2,NC_001611.1,NC_003443.1,NC_001664.4,NC_002549.1,NC_004102.1,NC_002204.0,NC_007605.1,NC_004162.2
d2star,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
NC_019843.3,1.0,0.550448,0.527246,0.51611,0.514545,0.643252,0.586605,0.568163,0.518922,0.556616,...,0.571731,0.539027,0.600338,0.539901,0.524421,0.531314,0.510463,0.545907,0.559538,0.544573
NC_004297.0,0.550448,1.0,0.519091,0.510399,0.520113,0.527542,0.549818,0.518694,0.505781,0.520639,...,0.50744,0.542692,0.555074,0.533355,0.512057,0.53578,0.50331,0.519315,0.533343,0.512534
NC_001802.1,0.527246,0.519091,1.0,0.590772,0.497058,0.564588,0.563069,0.537429,0.50209,0.537206,...,0.547129,0.534535,0.57686,0.515678,0.549949,0.535352,0.521066,0.548284,0.557793,0.527765
NC_001722.1,0.51611,0.510399,0.590772,1.0,0.510926,0.54525,0.55864,0.559144,0.526467,0.519396,...,0.549193,0.536877,0.568388,0.546993,0.542321,0.52672,0.518988,0.551428,0.565254,0.55027
NC_002023.0,0.514545,0.520113,0.497058,0.510926,1.0,0.52409,0.510801,0.486268,0.505161,0.522643,...,0.503788,0.508429,0.532387,0.533012,0.491691,0.502796,0.492594,0.534272,0.467274,0.505682
NC_004718.3,0.643252,0.527542,0.564588,0.54525,0.52409,1.0,0.600216,0.569111,0.52271,0.558093,...,0.590993,0.541224,0.626422,0.542142,0.544347,0.557383,0.533091,0.579757,0.581425,0.555768
NC_006998.1,0.586605,0.549818,0.563069,0.55864,0.510801,0.600216,1.0,0.606433,0.53467,0.558802,...,0.620861,0.529867,0.921251,0.547799,0.58775,0.542935,0.555293,0.571046,0.608974,0.582249
AC_000017.1,0.568163,0.518694,0.537429,0.559144,0.486268,0.569111,0.606433,1.0,0.526842,0.539465,...,0.722914,0.535468,0.634814,0.535021,0.619469,0.5468,0.606443,0.524529,0.716869,0.569519
NC_003977.2,0.518922,0.505781,0.50209,0.526467,0.505161,0.52271,0.53467,0.526842,1.0,0.526243,...,0.538787,0.497884,0.537287,0.506574,0.524471,0.509264,0.509182,0.509814,0.518547,0.521714
NC_012532.1,0.556616,0.520639,0.537206,0.519396,0.522643,0.558093,0.558802,0.539465,0.526243,1.0,...,0.54651,0.526766,0.560636,0.527757,0.551489,0.520352,0.537866,0.526752,0.535054,0.541225


##### Map viral genomic sequnce id to virus name

In [27]:
mapping=pd.read_excel('data_raw/database.xlsx', sheet_name='Virus_info')[['NCBI RefSeq Accession ID','ACRONYM' ]]
dic_vi=dict(mapping.values.tolist())
vir_sim_matrix=vir_sim_matrix.rename(columns=dic_vi, index=dic_vi) 


In [28]:
vir_sim_matrix=vir_sim_matrix.sort_index(axis=1)
vir_sim_matrix=vir_sim_matrix.sort_index(axis=0)
vir_sim_matrix

Unnamed: 0_level_0,Chikungunya virus,Coxsackievirus B5,Ebola virus,Enterovirus D,Enterovirus J,HBV,HCV,HHV-1,HHV-2,HHV-3,...,Lassa mammarenavirus,MCV,MERS-CoV,Nipah virus,RSV,SARS-CoV,SARS-CoV-2,VV,Variola virus,Zika virus
d2star,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Chikungunya virus,1.0,0.516603,0.532395,0.519898,0.508609,0.521714,0.539485,0.57995,0.575516,0.54885,...,0.512534,0.555728,0.544573,0.530703,0.523285,0.555768,0.570482,0.582249,0.59474,0.541225
Coxsackievirus B5,0.516603,1.0,0.513568,0.570859,0.55793,0.516581,0.488933,0.542364,0.527871,0.50037,...,0.517264,0.519451,0.529717,0.541211,0.523202,0.540989,0.553558,0.5104,0.525899,0.501666
Ebola virus,0.532395,0.513568,1.0,0.503441,0.498397,0.509264,0.535986,0.536519,0.551883,0.529696,...,0.53578,0.55371,0.531314,0.539395,0.520057,0.557383,0.559995,0.542935,0.548989,0.520352
Enterovirus D,0.519898,0.570859,0.503441,1.0,0.559862,0.522836,0.474324,0.530538,0.520856,0.524424,...,0.512673,0.493761,0.520923,0.513799,0.503035,0.539165,0.538074,0.538473,0.539484,0.488926
Enterovirus J,0.508609,0.55793,0.498397,0.559862,1.0,0.506899,0.484325,0.502548,0.504797,0.499448,...,0.513018,0.53401,0.542819,0.522267,0.527416,0.548926,0.533067,0.505114,0.510359,0.502136
HBV,0.521714,0.516581,0.509264,0.522836,0.506899,1.0,0.509182,0.538787,0.544751,0.520635,...,0.505781,0.5404,0.518922,0.527798,0.513848,0.52271,0.525323,0.53467,0.537287,0.526243
HCV,0.539485,0.488933,0.535986,0.474324,0.484325,0.509182,1.0,0.601197,0.61038,0.565284,...,0.50331,0.626379,0.510463,0.529315,0.525334,0.533091,0.533642,0.555293,0.552597,0.537866
HHV-1,0.57995,0.542364,0.536519,0.530538,0.502548,0.538787,0.601197,1.0,0.923709,0.664674,...,0.50744,0.714524,0.571731,0.56114,0.533397,0.590993,0.583255,0.620861,0.630542,0.54651
HHV-2,0.575516,0.527871,0.551883,0.520856,0.504797,0.544751,0.61038,0.923709,1.0,0.654162,...,0.521164,0.759096,0.569016,0.562328,0.533808,0.588871,0.585227,0.621845,0.632663,0.555958
HHV-3,0.54885,0.50037,0.529696,0.524424,0.499448,0.520635,0.565284,0.664674,0.654162,1.0,...,0.504545,0.552874,0.540457,0.514074,0.555458,0.519492,0.560829,0.605856,0.625449,0.538801


In [29]:
vir_sim_matrix.to_csv('data_processed/similarity_virus.csv')

sio.savemat('data_processed/vir_sim_matrix.mat',{'Sv':vir_sim_matrix.values , 'vi_names':vir_sim_matrix.index.values})

In [30]:
vi_dr_matrix

DRUG,Abacavir,Acyclovir,Adefovir,Adefovir dipivoxil,Amantadine,Amprenavir,Asunaprevir,Atazanavir,Baloxavir marboxil,Beclabuvir,...,Valaciclovir,Valganciclovir,Valomaciclovir,Vaniprevir,Velpatasvir,Vidarabine,Voxilaprevir,Zalcitabine,Zanamivir,Zidovudine
VIRUS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Chikungunya virus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Coxsackievirus B5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ebola virus,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Enterovirus D,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Enterovirus J,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HBV,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HCV,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
HHV-1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
HHV-2,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
HHV-3,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [31]:
sio.savemat('data_processed/virus_drug_association_withcv.mat',{'mat':vi_dr_matrix.values , 'vi':vi_dr_matrix.index.values, 'dr':vi_dr_matrix.columns.values})
sio.savemat('data_processed/vir_sim_matrix_withcv.mat',{'Sv':vir_sim_matrix.values , 'vi_names':vir_sim_matrix.index.values})
sio.savemat('data_processed/drug_sim_matrix_withcv.mat',{'Sd':drug_sim_matrix.values , 'dr_names':drug_sim_matrix.index.values})

#REMOVE SARS-CoV-2 
vi_dr_matrix=vi_dr_matrix.drop(['SARS-CoV-2'])
vi_dr_matrix=vi_dr_matrix.drop(columns=['Hydroxychloroquine', 'Carmofur'])

vir_sim_matrix=vir_sim_matrix.drop(['SARS-CoV-2'])
vir_sim_matrix=vir_sim_matrix.drop(columns=['SARS-CoV-2'])

drug_sim_matrix=drug_sim_matrix.drop(columns=['Hydroxychloroquine', 'Carmofur'])
drug_sim_matrix=drug_sim_matrix.drop(['Hydroxychloroquine', 'Carmofur'])

sio.savemat('data_processed/virus_drug_association.mat',{'mat':vi_dr_matrix.values , 'vi':vi_dr_matrix.index.values, 'dr':vi_dr_matrix.columns.values})
sio.savemat('data_processed/vir_sim_matrix.mat',{'Sv':vir_sim_matrix.values , 'vi_names':vir_sim_matrix.index.values})
sio.savemat('data_processed/drug_sim_matrix.mat',{'Sd':drug_sim_matrix.values , 'dr_names':drug_sim_matrix.index.values})
