In [1]:
import numpy as np
from ebc import EBC 
from matrix import SparseMatrix

with open("resources/matrix-ebc-paper-dense.tsv", "r") as f:
    data = []
    for line in f:
        sl = line.split("\t")
        if len(sl) < 5:  # headers
            continue
        data.append([sl[0], sl[2], float(sl[4])])


n = 3514      # unique drug-gene pairs
m = 1232      # unique dependency paths
matrix = SparseMatrix([n, m]) 
matrix.read_data(data)
matrix.normalize()


Con_mat = np.zeros((n, n), dtype=int) 
for k in range(1000):
    ebc = EBC(matrix, [30, 125], 10, 1e-10, 0.01)
    cXY, objective, iter = ebc.run()
    clusters = cXY[0]       # only drug-gene pair cluster assignments
    for i in range(n):
        C = clusters[i]
        for j in range(i, n):
            if clusters[j] == C:
                Con_mat[i,j] += 1     # upper triangle of the matrix 
                if i != j: 
                    Con_mat[j,i] += 1   # lower triangle of the matrix without duplicating diagonal
    

np.savetxt("Co-occurrency_Mat.csv", Con_mat, delimiter=',')


ModuleNotFoundError: No module named 'ebc'

In [1]:
import pandas as pd
import numpy as np
df=pd.read_csv("resources/matrix-ebc-paper-dense.tsv",sep='\t')
l=list(df.columns)

# rename column names
df.columns = ['drug-gene', 'row indicies', 'dependency path','column indicies','certainty']

df.loc[-1] = l  # adding a row
df.index = df.index + 1  # shifting index
df = df.sort_index()  # sorting by index              
df.head()  


In [2]:
# dropping ALL duplicate values
df.drop_duplicates(subset ="drug-gene",
                     keep = "first", inplace = True)

df.head()
                  

Unnamed: 0,drug-gene,row indicies,dependency path,column indicies,certainty
0,"(flavopiridol,nf-kappab)",0,"[prep_by, activation, amod]",781,1.0
1,"(tnf-r2,tnf-r1)",1,"[appos, receptor, appos]",413,1.0
4,"(il-2,il-5)",2,"[appos, csf, appos]",24,1.0
22,"(il-11,il-10)",3,"[appos, pdgfa, appos]",335,1.0
25,"(fgf-7,fgf-2)",4,"[appos, vegf, appos]",127,1.0


In [3]:
#converting drug-gene pairs into a numpy array
arr = df["drug-gene"].to_numpy()

In [4]:
df1=pd.read_csv("Co-occurrency_Mat.csv")
li=list(df1.columns)

#converting the dataframe into numpy array
num=df1.to_numpy()
num = np.insert(num, 0, li, axis=0)


df2 = pd.DataFrame(data = num,
                   index = arr, 
                  columns = arr) 


                 
df2.astype(int)


Unnamed: 0,"(flavopiridol,nf-kappab)","(tnf-r2,tnf-r1)","(il-2,il-5)","(il-11,il-10)","(fgf-7,fgf-2)","(clopidogrel,p-selectin)","(fgf-7,fgf-1)","(il-11,il-13)","(propranolol,beta2)","(isoflurane,caspase-3)",...,"(erythromycin,p-gp)","(methoxsalen,cyp2a6)","(atra,nf-kappab)","(delavirdine,cyp3a)","(pge2,ifn-gamma)","(dexamethasone,nfkappab)","(propafenone,cyp1a2)","(cisplatin,brca1)","(ndga,nrf2)","(zolmitriptan,5-ht1b)"
"(flavopiridol,nf-kappab)",1000,153,134,100,90,154,105,110,112,350,...,100,28,146,32,103,33,32,118,322,356
"(tnf-r2,tnf-r1)",153,1000,617,522,499,117,425,390,172,126,...,90,23,91,22,412,18,22,75,191,143
"(il-2,il-5)",134,617,1000,628,572,120,477,446,149,134,...,101,22,110,24,463,19,24,83,243,110
"(il-11,il-10)",100,522,628,1000,659,119,574,505,113,118,...,96,14,135,18,503,12,18,99,204,105
"(fgf-7,fgf-2)",90,499,572,659,1000,114,657,532,99,114,...,88,7,158,18,647,8,18,110,151,125
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(dexamethasone,nfkappab)",33,18,19,12,8,54,10,10,171,47,...,87,717,20,660,12,1000,660,181,18,9
"(propafenone,cyp1a2)",32,22,24,18,18,57,17,17,146,52,...,141,620,32,999,19,660,1000,185,31,30
"(cisplatin,brca1)",118,75,83,99,110,174,183,164,85,218,...,300,167,397,184,116,181,185,1000,108,282
"(ndga,nrf2)",322,191,243,204,151,200,159,161,74,235,...,106,25,161,31,175,18,31,108,1000,175


In [None]:
df2.to_csv("Co-occurrency_Matrix.csv")