In [None]:
import os
import numpy as np
import scipy
import pandas as pd
import networkx as nx

In [None]:
os.getcwd()

In [None]:
snakefile_path = os.getcwd() + "/../Snakefile"

In [None]:
try:
    snakemake
except NameError:
    from snakemk_util import load_rule_args

    snakemake = load_rule_args(
        snakefile = snakefile_path,
        rule_name = 'ppi_random_walk',
        root="../",
    )

In [None]:
input_lit_genes = snakemake.input['lit_rev_genes']
input_string_db = snakemake.input['string_db']
output_gene_seed_prob = snakemake.output['gene_seed_prob']

In [None]:
known_genes = pd.read_csv(input_lit_genes)
known_genes.head()

In [None]:
unique_known_genes = np.unique(known_genes.Ensembl_gene_ID)

In [None]:
len(unique_known_genes)

In [None]:
edges = pd.read_csv(input_string_db, sep='\t')

In [None]:
edges.head()

In [None]:
edges

In [None]:
edges = edges.dropna()

In [None]:
%%time
string_graph = nx.from_pandas_edgelist(edges, 'gene1', 'gene2', 'combined_score')

In [None]:
genes = pd.DataFrame({'gene_id': string_graph.nodes()})

In [None]:
genes

In [None]:
idx_of_seeds = genes[np.isin(genes.gene_id, unique_known_genes)].index

In [None]:
p_restart = 0.3

# init matrix of candidate genes
p_init = np.zeros(len(string_graph.nodes()))

p_init[idx_of_seeds] = 1

In [None]:
p_init = p_init/p_init.sum()

In [None]:

# get adjacency matrix and normalize it to obtain transition matrix
adj = nx.to_scipy_sparse_matrix(string_graph, weight='score')
norm_c = scipy.sparse.diags(1/adj.sum(axis=1).A.ravel())
adj = (norm_c @ adj)

# run a few random walk iterations 
p = p_init
for i in range(20):

    p = (1 - p_restart) * adj.T @ p +  p_restart * p_init

In [None]:
genes['score'] = p

In [None]:
genes

## Save random walk probabilities

In [None]:
genes.to_csv(output_gene_seed_prob, sep = '\t')

## Plot

In [None]:
import plotnine as p9

In [None]:
(
    p9.ggplot(genes, p9.aes('score')) +
    p9.geom_histogram() +
    p9.scale_x_log10() +
    p9.geom_vline(xintercept = 1e-4, color = 'red'),
    p9.theme_bw()
)