In [201]:
import numpy as np
import networkx as nx
import pandas as pd
from gensim.models import Word2Vec

In [202]:
file_path = '../data/dgidb/preprocessed_34_10.tsv'
interaction_matrix = pd.read_csv(file_path, sep='\t', index_col=0)

In [203]:
# Generate node lists
drugs = list(interaction_matrix.columns)
genes = list(interaction_matrix.index)

edges = []
for gene, interactions in interaction_matrix.iterrows():
    for drug, interaction in interactions.items():
        if interaction != 0:
            edges.append((drug, gene))  # drug to gene edge

In [204]:
len(drugs)

1236

In [205]:
model = Word2Vec(sentences=edges, vector_size=128, window=5, min_count=1, workers=5)

In [206]:
# Extract embeddings for drugs and genes
drug_embeddings = {drug: model.wv[drug] for drug in drugs if drug in model.wv}
gene_embeddings = {gene: model.wv[gene] for gene in genes if gene in model.wv}

# Convert to Pandas DataFrames
df_drugs = pd.DataFrame(drug_embeddings).T
df_genes = pd.DataFrame(gene_embeddings).T


In [207]:
save_path = '../data/dgidb/embeddings'

# Save to CSV files
df_drugs.to_csv(save_path+'/word2vec_drug_embeddings.csv', header=None)
df_genes.to_csv(save_path+'/word2vec_gene_embeddings.csv', header=None)