# Meta

In [2]:
#import sys
#sys.path.insert(1, "..\\")

# Autoreload modules without having to restart the notebook kernel.
%load_ext autoreload
%autoreload 2


# Plotting code from Georg's notebook.
import matplotlib.pyplot as plt

%matplotlib inline
font = {'family': 'DejaVu Sans',
        'weight': 'bold',
        'size': 32}
plt.rc('font', **font)

import networkx as nx
import pandas as pd

# Personal libraries
import lib.graph
import lib.files
import lib.cluster
import lib.constants

# Constants

In [3]:
ICP55 = '4932.YER078C'
THRESHOLD = 900

### ICP55 cc 900 inv
ICP55's largest connected component with edge threshold 900 and inviable proteins included.

In [4]:
n0 = lib.graph.read_STRING()
n0 = lib.graph.remove_edges_below_threshold(n0, THRESHOLD)
n0 = lib.graph.remove_nodes_with_degree_lte(n0, 0)
n0 = lib.graph.get_largest_cc_with_node(n0, ICP55)
lib.graph.write_weighted_edgelist(n0, lib.files.make_filepath_to_networks('icp55-cc-900-inv.txt'))

### ICP55 cc 900 noInv
ICP55's largest connected component with edge threshold 900 and invialbe proteins exlcuded.

In [5]:
# Read in inviable_proteins as a list.
inviable_proteins = lib.graph.read_inviable_proteins(as_graph=False)

# Preprocess the network
n1 = lib.graph.read_STRING()
n1 = lib.graph.remove_edges_below_threshold(n1, THRESHOLD)
n1 = lib.graph.remove_nodes_from_list(n1, inviable_proteins)
n1 = lib.graph.remove_nodes_with_degree_lte(n1, 0)
n1 = lib.graph.get_largest_cc_with_node(n1, ICP55)

# Write to file
lib.graph.write_weighted_edgelist(n1, lib.files.make_filepath_to_networks('icp55-cc-900-noInv.txt'))

### ICP55 cc 900 inv mcl cluster-shorpl<=3
Connected component of ICP55 including inviable proteins.

Cutoff = 900

Markov clustering

Restricted to nodes with shortest path length <= 3 to icp55 or pim1 and nodes belonging to clusters with
shortest path length <= 3 to icp55 or pim1.

In [6]:
SHORPL_CUTOFF = 3

network_name = 'icp55-cc-900-inv'
network_filepath = lib.files.make_filepath_to_networks(f'{network_name}.txt')
cluster_filepath = lib.files.make_filepath_to_clusters(f'mcl.{network_name}.nodes.csv')
cluster_df_filepath = lib.files.make_filepath_to_clusters(f'mcl.{network_name}.dataframe.csv')

network = lib.graph.read_weighted_edgelist(network_filepath)
clusters = lib.cluster.read_csv(cluster_filepath)
cluster_df = pd.read_csv(cluster_df_filepath, header=0, index_col=0)

condition_1 = cluster_df['size'] > 5
condition_2 = (cluster_df['icp55_shorpl'] <= SHORPL_CUTOFF) | (cluster_df['pim1_shorpl'] <= SHORPL_CUTOFF)
relevant_cluster_df = cluster_df[condition_1 & condition_2]
relevant_clusters = [clusters[idx] for idx in relevant_cluster_df['cluster']]

relevant_proteins = set()
relevant_proteins.update(lib.cluster.proteins(relevant_clusters))
relevant_proteins.update(lib.graph.get_neighbourhood(network, lib.constants.ICP55, SHORPL_CUTOFF))
relevant_proteins.update(lib.graph.get_neighbourhood(network, lib.constants.PIM1, SHORPL_CUTOFF))
relevant_cluster_network = lib.graph.get_largest_connected_component(network.subgraph(relevant_proteins))

filepath = lib.files.make_filepath_to_networks('mcl_shorpl_lte_3.icp55-cc-900-inv.csv')
# UNCOMMENTING BELOW WILL OVERWRITE FILE
# lib.graph.write_weighted_edgelist(relevant_cluster_network, filepath)

In [7]:
relevant_clusters
len(relevant_protein)

[['4932.YPL135W',
  '4932.YCL017C',
  '4932.YDL120W',
  '4932.YER048W-A',
  '4932.YGL018C',
  '4932.YJR122W',
  '4932.YKL040C',
  '4932.YLL027W',
  '4932.YLR369W',
  '4932.YMR301C',
  '4932.YOR196C',
  '4932.YPL059W',
  '4932.YPL252C',
  '4932.YPR067W',
  '4932.YAL044W-A',
  '4932.YAL046C',
  '4932.YOR226C'],
 ['4932.YOR232W',
  '4932.YDR376W',
  '4932.YJR045C',
  '4932.YNL328C',
  '4932.YEL030W',
  '4932.YFL016C'],
 ['4932.YGL043W',
  '4932.YBR154C',
  '4932.YBR279W',
  '4932.YDL108W',
  '4932.YDL140C',
  '4932.YDR079C-A',
  '4932.YDR311W',
  '4932.YDR404C',
  '4932.YDR443C',
  '4932.YER171W',
  '4932.YGL070C',
  '4932.YOR210W',
  '4932.YPR133C',
  '4932.YPL122C',
  '4932.YGR116W',
  '4932.YOR123C',
  '4932.YPL042C',
  '4932.YOR224C',
  '4932.YHR143W-A',
  '4932.YLR418C',
  '4932.YGL127C',
  '4932.YGL244W',
  '4932.YPR086W',
  '4932.YML010W',
  '4932.YPR187W',
  '4932.YPR161C',
  '4932.YKL028W',
  '4932.YIL021W',
  '4932.YGR063C',
  '4932.YMR277W',
  '4932.YJL006C',
  '4932.YKR062W',
