In [1]:
import numpy as np
import networkx as nx
import pandas as pd
from struc2vec import Struc2Vec

In [2]:
file_path = '../data/dgidb/preprocessed_34_10.tsv'
interaction_matrix = pd.read_csv(file_path, sep='\t', index_col=0)

In [3]:
# Generate node lists
drugs = list(interaction_matrix.columns)
genes = list(interaction_matrix.index)

In [4]:
G = nx.Graph()
G.add_nodes_from(drugs, bipartite=0)
G.add_nodes_from(genes, bipartite=1)

for i in range (len(drugs)):
    for j in range (len(genes)):
        if interaction_matrix.iloc[j,i] == 0:
            G.add_edge(drugs[i], genes[j])

In [5]:
model = Struc2Vec(G, 10, 80, workers=5, verbose=40, )
model.train()
embeddings = model.get_embeddings()

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:   34.4s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:   34.4s remaining:   34.4s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:   36.3s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:    1.6s
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    2.0s remaining:    2.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    2.9s finished


Learning representation...
Learning representation done!


In [6]:
# Extract embeddings for drugs and genes
drug_embeddings = {drug: embeddings[drug] for drug in drugs}
gene_embeddings = {gene: embeddings[gene] for gene in genes}

# Convert to pandas DataFrames
drug_embeddings_df = pd.DataFrame.from_dict(drug_embeddings, orient='index')
gene_embeddings_df = pd.DataFrame.from_dict(gene_embeddings, orient='index')

# Export to CSV files
save_path = '../data/dgidb/embeddings'

drug_embeddings_df.to_csv(save_path+'/struc2vec_drug_embeddings.csv', header=None)
gene_embeddings_df.to_csv(save_path+'/struc2vec_gene_embeddings.csv', header=None)