In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
drug_interaction=pd.read_csv("drug_interaction_drugcomb.csv",low_memory=False)
drug_feature=pd.read_csv("drug_feature.csv",index_col=0)
cell_feature=pd.read_csv("final_cell_line_feature.csv",index_col=0)

In [3]:
drug_interaction.shape

(1048575, 18)

In [4]:
drug_interaction.head()

Unnamed: 0,block_id,drug_row,drug_col,cell_line_name,conc_r_unit,conc_c_unit,css,synergy_zip,synergy_bliss,synergy_loewe,synergy_hsa,ic50_row,ic50_col,ri_row,ri_col,css_row,css_col,S
0,1,5-FU,ABT-888,A2058,uM,uM,30.869,3.865915,6.256584,-2.951386,5.536903,5.126836,3.267734,11.471,-0.441,22.545,39.193,19.839
1,2,5-FU,ABT-888,A2058,uM,uM,27.46,8.247403,12.333896,3.125927,11.614215,5.126836,3.267734,11.471,-0.441,24.135,30.785,16.43
2,3,5-FU,ABT-888,A2058,uM,uM,29.901,6.06344,11.660209,2.452239,10.940528,5.126836,3.267734,11.471,-0.441,25.561,34.241,18.871
3,4,5-FU,ABT-888,A2058,uM,uM,24.016,-4.280231,5.145209,-4.062761,4.425528,5.126836,3.267734,11.471,-0.441,16.661,31.371,12.986
4,5,5-FU,AZD1775,A2058,uM,uM,66.847,12.284698,15.765467,10.409407,18.65634,5.126836,0.266027,11.471,25.164,76.501,57.193,30.212


In [5]:
drug_interaction.dropna(inplace=True)

In [6]:
drug_interaction.shape

(466033, 18)

In [7]:
drug_interaction['synergistic_status'] = drug_interaction['synergy_zip'].apply(lambda x: 1 if x > 0 else 0)

In [8]:
drug_interaction=drug_interaction[['drug_row','drug_col','cell_line_name','synergistic_status']]

In [9]:
drug_interaction.head()

Unnamed: 0,drug_row,drug_col,cell_line_name,synergistic_status
0,5-FU,ABT-888,A2058,1
1,5-FU,ABT-888,A2058,1
2,5-FU,ABT-888,A2058,1
3,5-FU,ABT-888,A2058,0
4,5-FU,AZD1775,A2058,1


In [10]:
drug_interaction.synergistic_status.value_counts()

synergistic_status
0    258510
1    207523
Name: count, dtype: int64

In [11]:
interaction_drug= set(drug_interaction['drug_row']).union(set(drug_interaction['drug_col']))

In [12]:
len(interaction_drug)

4150

In [13]:
drug_feature.head()

Unnamed: 0,drug,0,1,2,3,4,5,6,7,8,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,5-FU,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,ABT-888,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,AZD1775,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,BEZ-235,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,BORTEZOMIB,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
all_drug_with_feature=set(drug_feature['drug'].unique())

In [15]:
len(all_drug_with_feature)

4051

##### if all interaction drug has feature or not

In [16]:
if interaction_drug.issubset(all_drug_with_feature):
    print("All interaction drugs are present in the feature set.")
else:
    print(" Some interaction drugs are missing in the feature set.")
    missing = interaction_drug - all_drug_with_feature
    print("Missing drugs:", missing)


 Some interaction drugs are missing in the feature set.
Missing drugs: {'AC1L34YQ', 'AC1OFCEV', 'ZINC17545571', 'Vincristine Sulfate, Apocynaceae sp.', "Phloretin 2'-galactoside", 'AC1O7GND', 'ERK 11e', "3',4'-Diacetylafzelin", 'ZINC84688828', 'ZINC97975747', 'AC1NUNML', 'Curcumin, Curcuma longa L.', 'AC1O7H3F', 'AC1O53DL', 'PIFITHRIN-A (PFTA)', 'GYNOSTEMMA EXTRACT', 'CTK8E8456', 'Provitamin A1', 'AC1Q4OOX', 'QCR-20', 'ZINC208012538', 'ZINC101426918', 'ZINC103939005', 'AK174336', 'ACMC-20mj2l', 'CTK8F0346', 'AC1LAYFC', 'MOLPORT-042-665-727', 'TIC10 isomer', 'ZINC43163828', 'KS-00000Y5E', 'ZINC28538988', 'Pd-1/pd-l1 inhibitor 1', 'AGN-PC-0MU5N5', 'ZINC38139483', 'DSSTox_CID_28582', 'AK198640', 'AK-77283', 'AK160201', 'AC1OFCD4', 'ZINC34894448', 'Bacitracin (Zinc)', 'ZINC208951860', 'AC1MWLHC', 'AC1Q29DP', 'AC1L2QK3', 'ZINC11616261', 'ZINC117147304', 'KS-00001D5I', 'ZINC242723022', 'AC1L9B2S', 'AC1O41R4', 'AC1OFCEP', 'ZINC208949883', 'AC1OFCJY', 'AC1LIMNP', 'ZINC3881972', '1beta-Methylim

In [17]:
drug_interaction = drug_interaction[
    ~drug_interaction['drug_row'].isin(missing) &
    ~drug_interaction['drug_col'].isin(missing)
].reset_index(drop=True)

In [18]:
drug_interaction.shape

(464744, 4)

In [19]:
all_drugs_before = set(drug_interaction['drug_row']) | set(drug_interaction['drug_col'])
all_drugs_after = set(drug_interaction['drug_row']) | set(drug_interaction['drug_col'])

removed_drugs = all_drugs_before - all_drugs_after
print(f"Number of unique drugs before: {len(all_drugs_before)}")
print(f"Number of unique drugs after: {len(all_drugs_after)}")
print(f"Drugs removed: {len(removed_drugs)}")
print(f"Drugs removed list: {removed_drugs}")

Number of unique drugs before: 4051
Number of unique drugs after: 4051
Drugs removed: 0
Drugs removed list: set()


In [20]:
interaction_drug= set(drug_interaction['drug_row']).union(set(drug_interaction['drug_col']))

In [21]:
interaction_drug_new= set(drug_interaction['drug_row']).union(set(drug_interaction['drug_col']))

In [22]:
len(interaction_drug),len(interaction_drug_new)

(4051, 4051)

In [23]:
all_drug_with_feature==interaction_drug_new

True

In [24]:
drug_feature['drug_id'] = range(len(drug_feature))

In [25]:
drug_to_id = dict(zip(drug_feature['drug'], drug_feature['drug_id']))

In [26]:
drug_feature.drop(columns=['drug_id','drug'],inplace=True)

In [27]:
drug_feature.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [28]:
drug_feature.shape

(4051, 2048)

In [29]:
drug_interaction['drug_row_id'] = drug_interaction['drug_row'].map(drug_to_id)
drug_interaction['drug_col_id'] = drug_interaction['drug_col'].map(drug_to_id)

In [30]:
drug_interaction = drug_interaction.drop(columns=['drug_row', 'drug_col'])

In [31]:
cols = ['drug_row_id', 'drug_col_id'] + [col for col in drug_interaction.columns if col not in ['drug_row_id', 'drug_col_id']]
drug_interaction = drug_interaction[cols]

In [32]:
drug_interaction.head()

Unnamed: 0,drug_row_id,drug_col_id,cell_line_name,synergistic_status
0,0,1,A2058,1
1,0,1,A2058,1
2,0,1,A2058,1
3,0,1,A2058,0
4,0,2,A2058,1


In [33]:
drug_feature.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2038,2039,2040,2041,2042,2043,2044,2045,2046,2047
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [34]:
cos_sim_matrix = cosine_similarity(drug_feature)

In [35]:
cos_sim_matrix

array([[1.        , 0.17699808, 0.11211037, ..., 0.11646819, 0.09727208,
        0.14808722],
       [0.17699808, 1.        , 0.2073627 , ..., 0.28200837, 0.22898571,
        0.21912525],
       [0.11211037, 0.2073627 , 1.        , ..., 0.17366199, 0.20719896,
        0.34698416],
       ...,
       [0.11646819, 0.28200837, 0.17366199, ..., 1.        , 0.21525296,
        0.27526932],
       [0.09727208, 0.22898571, 0.20719896, ..., 0.21525296, 1.        ,
        0.26000576],
       [0.14808722, 0.21912525, 0.34698416, ..., 0.27526932, 0.26000576,
        1.        ]], shape=(4051, 4051))

In [36]:
cos_sim_matrix.shape

(4051, 4051)

In [37]:
interaction_drug= set(drug_interaction['drug_row_id']).union(set(drug_interaction['drug_col_id']))

In [38]:
len(interaction_drug)

4051

In [39]:
drug_feature.shape

(4051, 2048)

In [42]:
df_synergy = drug_interaction[drug_interaction['synergistic_status'] == 1]


In [43]:
df_synergy

Unnamed: 0,drug_row_id,drug_col_id,cell_line_name,synergistic_status
0,0,1,A2058,1
1,0,1,A2058,1
2,0,1,A2058,1
4,0,2,A2058,1
5,0,2,A2058,1
...,...,...,...,...
464739,4046,1823,KB-ChR-8-5-11,1
464740,4047,1823,KB-ChR-8-5-11,1
464741,4048,1823,KB-ChR-8-5-11,1
464742,4049,1823,KB-ChR-8-5-11,1


In [45]:
num_drugs = max(drug_interaction['drug_row_id'].max(), drug_interaction['drug_col_id'].max()) + 1


In [46]:
num_drugs

np.int64(4051)

In [47]:
adj_matrix = np.zeros((num_drugs, num_drugs))


In [48]:
adj_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(4051, 4051))

In [49]:
for _, row in df_synergy.iterrows():
    i = int(row['drug_row_id'])
    j = int(row['drug_col_id'])
    adj_matrix[i, j] = 1
    adj_matrix[j, i] = 1  

In [50]:
np.fill_diagonal(adj_matrix, 0)

In [53]:
adj_matrix.shape

(4051, 4051)

In [54]:
adj_matrix

array([[0., 1., 1., ..., 0., 0., 0.],
       [1., 0., 1., ..., 0., 0., 0.],
       [1., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(4051, 4051))

In [None]:
drug