In [1]:
import numpy as np
from ebc import EBC 
from matrix import SparseMatrix

with open("resources/matrix-ebc-paper-sparse.tsv", "r") as f:
    data = []
    for line in f:
        sl = line.split("\t")
        if len(sl) < 5:  # headers
            continue
        data.append([sl[0], sl[2], float(sl[4])])


n = 14052      # unique drug-gene pairs
p = 7272      # unique dependency paths
matrix = SparseMatrix([n, p]) 
matrix.read_data(data)
matrix.normalize()


Con_mat = np.zeros((n, n), dtype=int) 
for k in range(100):
    ebc = EBC(matrix, [7, 25], 10, 1e-10, 0.01)
    cXY, objective, iter = ebc.run()
    clusters = cXY[0]
    for i in range(n):
        C = clusters[i]
        for j in range(i, n):
            if clusters[j] == C:
                Con_mat[i,j] += 1     # upper triangle of the matrix with diagonal
                if i != j: 
                    Con_mat[j,i] += 1   # lower triangle of the matrix without duplicating diagonal

Running EBC on a 2-d sparse matrix with size [14052, 7272] ...
Randomly initializing clusters, with cluster number on each axis: [7, 25] ...
--> Running iteration 1 .. objective value = 6.436251
--> Running iteration 2 .. objective value = 6.411118
--> Running iteration 3 .. objective value = 6.406774
EBC finished in 3 iterations, with final objective value 6.4068
Running EBC on a 2-d sparse matrix with size [14052, 7272] ...
Randomly initializing clusters, with cluster number on each axis: [7, 25] ...
--> Running iteration 1 .. objective value = 6.300809
--> Running iteration 2 .. objective value = 6.189853
--> Running iteration 3 .. objective value = 6.184770
EBC finished in 3 iterations, with final objective value 6.1848
Running EBC on a 2-d sparse matrix with size [14052, 7272] ...
Randomly initializing clusters, with cluster number on each axis: [7, 25] ...
--> Running iteration 1 .. objective value = 6.337652
--> Running iteration 2 .. objective value = 6.308053
--> Running itera

In [2]:
import pandas as pd
import numpy as np
df=pd.read_csv("Pair_Path_Mapping/Pair_Path_Sparse.csv")

#converting drug-gene pairs into a numpy array
arr = df["drug-gene"].to_numpy()

In [3]:
#converting the 'Con_mat' numpy array into dataframe with rows & columns as same drug-gene pairs
df1 = pd.DataFrame(data = Con_mat,
                   index = arr, 
                  columns = arr)
df1.head()

Unnamed: 0,(rc-160/igf-i),(tnf-r2/tnf-r1),(tiludronate/vegf),(il-11/il-10),(il-2/il-5),(actd/actc),(il-11/il-16),(fgf-7/fgf-2),(urokinase/fmlp),(clopidogrel/p-selectin),...,(clodronate/vegf),(cisplatin/mkp1),(paclitaxel/aromatase),(dasatinib/parp),(methoxsalen/cyp2a6),(canertinib/erbb),(saha/ctcl),(tamoxifen/kallikrein),(delavirdine/cyp3a),(propafenone/cyp1a2)
(rc-160/igf-i),100,9,39,12,8,47,39,19,35,37,...,50,53,46,52,7,9,37,49,21,19
(tnf-r2/tnf-r1),9,100,8,44,57,8,10,32,25,16,...,4,6,11,5,19,12,5,6,18,15
(tiludronate/vegf),39,8,100,6,7,63,59,23,38,42,...,62,57,59,67,3,7,47,56,21,18
(il-11/il-10),12,44,6,100,62,6,9,50,13,18,...,6,7,9,8,16,6,7,9,10,8
(il-2/il-5),8,57,7,62,100,7,8,41,15,18,...,5,5,7,5,20,10,5,6,13,10


In [4]:
df1.to_csv("Co-occurrence_Sparse.csv")