In [1]:
import sys
import warnings
from os import listdir
from os.path import exists

import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
from joblib import Parallel, delayed

warnings.filterwarnings("ignore")

from tqdm import tqdm

In [2]:
def get_weight_parallel(gene, G, corr):
    paths = list(nx.all_simple_paths(G, source="GO:0008150", target=gene))

    if len(paths) != 1:
        path = set()
        for i in paths:
            path = path | set(i)
    else:
        path = paths[0]

    path.remove(gene)
    return [gene] + list(np.average(corr.loc[list(path)], axis=0))

In [3]:
def get_weight(corr_score, go):
    corr_score = pd.read_csv(corr_score, index_col=0)

    nodes = list(set(go[0]) | set(go[1]))
    genes = set(go[["GO" not in i for i in go[1]]][1])
    
    G = nx.DiGraph()
    G.add_nodes_from(nodes)
    G.add_edges_from(list(go.itertuples(index=False, name=None)))

    print("Start to calculate the weight of each gene.")
    t = Parallel(n_jobs=-1)(delayed(get_weight_parallel)(gene, G, corr_score) for gene in tqdm(list(genes)))
    t = pd.DataFrame(t)
    t.index = list(t[0])
    t = t.drop(0, axis=1)
    t.columns = corr_score.columns

    weight = pd.concat([corr_score, t])
    weight.to_csv("../DrugCell/data_rcellminer/weight_corr.csv")

In [4]:
def get_graph_info(corr_score, onto_file):
    go = pd.read_table(onto_file, header=None)[[0, 1]]
    go.to_csv("../data/graph.csv",)
    
    get_weight(corr_score, go)

In [5]:
get_graph_info(
    '../DrugCell/data_rcellminer/corr_score.csv', 
    '../DrugCell/data_rcellminer/go.txt'
)

Start to calculate the weight of each gene.


100%|██████████| 2727/2727 [00:46<00:00, 58.95it/s]


In [6]:
pubchem_id =  pd.read_csv('../data/nsc_cid_smiles.csv')
pubchem_id = {pubchem_id['SMILES'][i]: pubchem_id['CID'][i] for i in pubchem_id.index}
t = pd.read_csv("../DrugCell/data_rcellminer/weight_corr.csv", index_col=0)
t.columns = [pubchem_id[i] for i in t.columns]
t.to_csv("../DrugCell/data_rcellminer/weight_corr_cid.csv")