In [1]:
import numpy as np
import networkx as nx
import pandas as pd
from deepwalk import DeepWalk

In [2]:
file_path = '../data/dgidb/preprocessed_34_10.tsv'
interaction_matrix = pd.read_csv(file_path, sep='\t', index_col=0)

In [3]:
# Generate node lists
drugs = list(interaction_matrix.columns)
genes = list(interaction_matrix.index)

In [4]:
G = nx.Graph()
G.add_nodes_from(drugs, bipartite=0)
G.add_nodes_from(genes, bipartite=1)

for i in range (len(drugs)):
    for j in range (len(genes)):
        if interaction_matrix.iloc[j,i] == 0:
            G.add_edge(drugs[i], genes[j])

In [5]:
model = DeepWalk(G, walk_length=10, num_walks=80, workers=5)

In [6]:
model.train(window_size=5, iter=100)
embeddings = model.get_embeddings()

Learning embedding vectors...
Learning embedding vectors done!


In [7]:
# Extract embeddings for drugs and genes
drug_embeddings = {drug: embeddings[drug] for drug in drugs}
gene_embeddings = {gene: embeddings[gene] for gene in genes}

# Convert to pandas DataFrames
drug_embeddings_df = pd.DataFrame.from_dict(drug_embeddings, orient='index')
gene_embeddings_df = pd.DataFrame.from_dict(gene_embeddings, orient='index')

# Export to CSV files
save_path = '../data/dgidb/embeddings'

drug_embeddings_df.to_csv(save_path+'/deepwalk_drug_embeddings.csv', header=None)
gene_embeddings_df.to_csv(save_path+'/deepwalk_gene_embeddings.csv', header=None)