# Meta

In [3]:
# Autoreload modules without having to restart the notebook kernel.
% load_ext autoreload
% autoreload 2


# Plotting code from Georg's notebook.
import matplotlib.pyplot as plt

% matplotlib inline
font = {'family': 'DejaVu Sans',
        'weight': 'bold',
        'size': 32}
plt.rc('font', **font)

import networkx as nx
import pandas as pd

# Personal libraries
import lib.graph
import lib.files
import lib.cluster
import lib.constants

UsageError: Line magic function `%` not found.


# Constants

In [5]:
ICP55 = '4932.YER078C'
THRESHOLD = 900

### ICP55 cc 900 inv
ICP55's largest connected component with edge threshold 900 and inviable proteins included.

In [4]:
n0 = lib.graph.read_STRING()
n0 = lib.graph.remove_edges_below_threshold(n0, THRESHOLD)
n0 = lib.graph.remove_nodes_with_degree_lte(n0, 0)
n0 = lib.graph.get_largest_cc_with_node(n0, ICP55)
lib.graph.write_weighted_edgelist(n0, lib.files.make_filepath_to_networks('icp55-cc-900-inv.txt'))

### ICP55 cc 900 noInv
ICP55's largest connected component with edge threshold 900 and invialbe proteins exlcuded.

In [7]:
# Read in inviable_proteins as a list.
inviable_proteins = lib.graph.read_inviable_proteins(as_graph=False)

# Preprocess the network
n1 = lib.graph.read_STRING()
n1 = lib.graph.remove_edges_below_threshold(n1, THRESHOLD)
n1 = lib.graph.remove_nodes_from_list(n1, inviable_proteins)
n1 = lib.graph.remove_nodes_with_degree_lte(n1, 0)
n1 = lib.graph.get_largest_cc_with_node(n1, ICP55)

# Write to file
lib.graph.write_weighted_edgelist(n1, lib.files.make_filepath_to_networks('icp55-cc-900-noInv.txt'))

### ICP55 cc 900 inv mcl cluster-shorpl<=3
Connected component of ICP55 including inviable proteins.

Cutoff = 900

Markov clustering

Restricted to nodes with shortest path length <= 3 to icp55 or pim1 and nodes belonging to clusters with
shortest path length <= 3 to icp55 or pim1.

In [None]:
SHORPL_CUTOFF = 3

network_name = 'icp55-cc-900-inv'
network_filepath = lib.files.make_filepath_to_networks(f'{network_name}.txt')
cluster_filepath = lib.files.make_filepath_to_clusters(f'mcl.{network_name}.nodes.csv')
cluster_df_filepath = lib.files.make_filepath_to_clusters(f'mcl.{network_name}.dataframe.csv')

network = lib.graph.read_weighted_edgelist(network_filepath)
clusters = lib.cluster.read_csv(cluster_filepath)
cluster_df = pd.read_csv(cluster_df_filepath, header=0, index_col=0)

condition_1 = cluster_df['size'] > 5
condition_2 = (cluster_df['icp55_shorpl'] <= SHORPL_CUTOFF) | (cluster_df['pim1_shorpl'] <= SHORPL_CUTOFF)
relevant_cluster_df = cluster_df[condition_1 & condition_2]
relevant_clusters = [clusters[idx] for idx in relevant_cluster_df['cluster']]

relevant_proteins = set()
relevant_proteins.update(lib.cluster.proteins(relevant_clusters))
relevant_proteins.update(lib.graph.get_neighbourhood(network, lib.constants.ICP55, SHORPL_CUTOFF))
relevant_proteins.update(lib.graph.get_neighbourhood(network, lib.constants.PIM1, SHORPL_CUTOFF))
relevant_cluster_network = lib.graph.get_largest_connected_component(network.subgraph(relevant_proteins))

filepath = lib.files.make_filepath_to_networks('mcl_shorpl_lte_3.icp55-cc-900-inv.csv')
# UNCOMMENTING BELOW WILL OVERWRITE FILE
# lib.graph.write_weighted_edgelist(relevant_cluster_network, filepath)