
# d3_ic10_medium
- this file is for dream3 data, with medium clusters generated using the ic_10 parameter (spectral clustering)
- we consider two different cuttoffs for adding proteins back:
1. Squareroot method: a protein must connect at least sqrt(num components) components to qualify for addition to a cluster
2. linear method: a protein must connect at least 0.5 * num components componetns to qualify for addition to a cluster


In [1]:
# imports:

# standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# library used for functional enrichment analysis
from func_e.FUNC_E import FUNC_E  # a method that can do funtional enrichment analysis!
import func_e.vocabs.all as vocabs
# following code is necessary as some functions used in the funcE class produce future warnings but i can't fix it as it is not my code
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# importing the custom classes i built
from matrix_class import ProteinMatrix # ppi matrix 
from cluster_class import AllClusters # dictionary to hold all clusters (in form number of cluster : list of proteins in that cluster)
from degreelist_class import DegreeList # creates a list of all proteins in order of their degree

# helper functions for setting up program
from recipe_utils import initialize_matrix_clusters_degreelist
from recipe_utils import find_clusters_and_proteins_together

# helper functions for functional enrichment
from recipe_utils import print_querylist_of_clusters_to_file
from recipe_utils import print_protein_background_to_file
from recipe_utils import create_term_mapping_list
from recipe_utils import get_initialized_fe

 **must change filenames:**

In [2]:
interactions_file = "data/interactions/3_signal_omnipath_directed_v3.txt"
clusters_file = "data/d3_ic10_medium/dream3_medium-cluster.json"

In [3]:
matrix, clusters, degreelist = initialize_matrix_clusters_degreelist(interactions_filepath=interactions_file, clusters_filepath=clusters_file)

*may want to change parameters below:*

In [4]:
linear_qualifying_clusters, linear_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=False, protein_ratio=.5, protein_constant=0)

In [5]:
sqrt_qualifying_clusters, sqrt_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=True, protein_ratio=1, protein_constant=0)

In [6]:
sqrt_qualifying_proteins

{0: ['PRKACA'],
 1: ['SRC'],
 2: ['PPP3CC',
  'PPP3CB',
  'PPP3CA',
  'CALM2',
  'PRKAA1',
  'CDK5',
  'PPP2CB',
  'PPP2CA',
  'GSK3B',
  'CDK1',
  'PRKACA'],
 3: ['MAP3K14',
  'RELA',
  'STAT3',
  'EP300',
  'MAPK8',
  'AR',
  'MAPK14',
  'GSK3B',
  'TP53',
  'MAPK3',
  'AKT1',
  'CSNK2A1',
  'MAPK1'],
 4: ['PPP3CA', 'PPP2CB', 'PPP2CA', 'MAPK14', 'MAPK3', 'MAPK1'],
 5: ['MDM2',
  'EP300',
  'ABL1',
  'ATM',
  'MAPK14',
  'TP53',
  'MAPK3',
  'AKT1',
  'CSNK2A1',
  'CDK1'],
 6: ['CDK5', 'PAK1', 'MAPK3', 'MAPK1', 'PRKACA', 'PRKCA', 'SRC'],
 7: ['NFKB1',
  'SMAD4',
  'PPP2CB',
  'SMAD3',
  'PPP2CA',
  'MAPK8',
  'MAPK14',
  'MAPK3',
  'MAPK1'],
 8: ['SMAD4', 'SMAD2', 'SMAD3'],
 9: ['EGFR', 'CDK1'],
 10: ['AKT1'],
 11: ['TP53'],
 12: ['MAPT', 'JUN', 'MAPK14'],
 13: ['MYC', 'TP53', 'MAPK3', 'MAPK1', 'CDK1'],
 14: ['MAPK14'],
 15: ['MTOR', 'GSK3B', 'AKT1'],
 16: ['MAPK14', 'MAPK3', 'MAPK1'],
 17: ['IKBKG', 'TRAF2', 'MAP3K7', 'TRAF6'],
 18: ['CREBBP',
  'SMAD4',
  'SMAD2',
  'RELA',
  'SMAD3

In [7]:
linear_qualifying_proteins

{0: ['PRKACA'],
 1: ['SRC'],
 2: ['PRKACA'],
 3: ['AR', 'MAPK14', 'GSK3B', 'MAPK3', 'AKT1', 'CSNK2A1', 'MAPK1'],
 4: ['MAPK3', 'MAPK1'],
 6: ['SRC'],
 7: ['MAPK1'],
 10: ['AKT1'],
 13: ['TP53'],
 16: ['MAPK3', 'MAPK1'],
 18: ['EP300'],
 19: ['PRKCA'],
 20: ['PRKCA'],
 21: ['LYN', 'GRB2', 'FYN', 'EGFR', 'SRC'],
 22: ['CSNK2A1'],
 23: ['AR'],
 24: ['CDK1'],
 25: ['TP53'],
 26: ['CDK1'],
 27: ['SRC'],
 29: ['ATR'],
 30: ['PRKACA'],
 39: ['PRKCA'],
 41: ['PRKACA', 'PRKCA'],
 44: ['CSNK2A1'],
 45: ['PRKACA'],
 46: ['CDK1'],
 47: ['TP53', 'CDK1'],
 50: ['RCAN1',
  'TRIP6',
  'GNRHR',
  'PIAS1',
  'CDK9',
  'CDK7',
  'TLR2',
  'TLR4',
  'CDKN1B',
  'IRAK1',
  'CAMK2A',
  'TRAF6',
  'AR',
  'MAPK14',
  'TP53',
  'MAPK1',
  'SRC'],
 53: ['SRC']}