
# d2_ic50_small
- this file is for dream2 data, with small clusters generated using the ic_50 parameter (spectral clustering)
- we consider two different cuttoffs for adding proteins back:
1. Squareroot method: a protein must connect at least sqrt(num components) components to qualify for addition to a cluster
2. linear method: a protein must connect at least 0.5 * num components componetns to qualify for addition to a cluster


In [1]:
# imports:

# standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# library used for functional enrichment analysis
from func_e.FUNC_E import FUNC_E  # a method that can do funtional enrichment analysis!
import func_e.vocabs.all as vocabs
# following code is necessary as some functions used in the funcE class produce future warnings but i can't fix it as it is not my code
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# importing the custom classes i built
from matrix_class import ProteinMatrix # ppi matrix 
from cluster_class import AllClusters # dictionary to hold all clusters (in form number of cluster : list of proteins in that cluster)
from degreelist_class import DegreeList # creates a list of all proteins in order of their degree

# helper functions for setting up program
from recipe_utils import initialize_matrix_clusters_degreelist
from recipe_utils import find_clusters_and_proteins_together

# helper functions for functional enrichment
from recipe_utils import print_querylist_of_clusters_to_file
from recipe_utils import print_protein_background_to_file
from recipe_utils import create_term_mapping_list
from recipe_utils import get_initialized_fe

 **must change filenames:**

In [2]:
interactions_file = "data/interactions/2_ppi_inweb_v2.txt"
clusters_file = "data/d2_ic50_small/dream2_small-cluster.json"

In [3]:
matrix, clusters, degreelist = initialize_matrix_clusters_degreelist(interactions_filepath=interactions_file, clusters_filepath=clusters_file)

*may want to change parameters below:*

In [4]:
linear_qualifying_clusters, linear_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=False, protein_ratio=.5, protein_constant=0)

In [5]:
sqrt_qualifying_clusters, sqrt_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=True, protein_ratio=1, protein_constant=0)

In [6]:
sqrt_qualifying_proteins

{9: ['MCM6'],
 26: ['ACTN2'],
 28: ['TP53'],
 44: ['CREBBP'],
 58: ['WDR74',
  'RRP1B',
  'PWP1',
  'MDH2',
  'NOC2L',
  'GTPBP4',
  'PES1',
  'PSMC5',
  'HIST1H2BD',
  'H2BFS',
  'HIST2H2BA',
  'HIST1H2BJ',
  'HIST3H2BB',
  'HIST1H2BB',
  'SMARCA2',
  'HIST1H2BM',
  'NOL6',
  'HIST2H2BF',
  'PSMC6',
  'DDX18',
  'EIF4G3',
  'HIST1H2AH',
  'HIST1H2AC',
  'SMARCA4'],
 59: ['OTC',
  'PUS7',
  'POLE2',
  'RIOK2',
  'MRPL27',
  'C20orf4',
  'DDX10',
  'CAP2',
  'CAP1',
  'LARP7',
  'SNRP70',
  'EIF5B',
  'PRPF8',
  'HDLBP',
  'SMARCA2',
  'LARP2',
  'NOL6',
  'IMPDH1',
  'RPS10',
  'POLE',
  'P4HB',
  'EIF4G3',
  'KIAA0664',
  'HIST1H2AH',
  'HIST1H2AC',
  'SMARCA4',
  'EIF4E'],
 100: ['ACSL4', 'EPRS', 'KIAA0664'],
 133: ['FEN1', 'RAD52'],
 140: ['AL356585.3',
  'ATP13A1',
  'TAF13',
  'TAF1',
  'SUPT3H',
  'NSF',
  'TAF12',
  'TAF6',
  'PCNA'],
 142: ['RPL37A', 'RPL28', 'RPS26'],
 146: ['MAPK13', 'MAPK12', 'MAPK11', 'MAPK8', 'MAPK9', 'MAPK14', 'MAPK3'],
 150: ['MAT1A', 'EPRS'],
 161: ['UB

In [7]:
linear_qualifying_proteins

{58: ['HIST1H2BD',
  'H2BFS',
  'HIST2H2BA',
  'HIST1H2BJ',
  'HIST3H2BB',
  'HIST1H2BB',
  'SMARCA2',
  'HIST1H2BM',
  'HIST2H2BF',
  'DDX18',
  'EIF4G3',
  'HIST1H2AH',
  'HIST1H2AC',
  'SMARCA4'],
 59: ['OTC',
  'PUS7',
  'POLE2',
  'RIOK2',
  'MRPL27',
  'C20orf4',
  'DDX10',
  'CAP2',
  'CAP1',
  'LARP7',
  'SNRP70',
  'EIF5B',
  'PRPF8',
  'HDLBP',
  'SMARCA2',
  'LARP2',
  'NOL6',
  'IMPDH1',
  'RPS10',
  'POLE',
  'P4HB',
  'EIF4G3',
  'KIAA0664',
  'HIST1H2AH',
  'HIST1H2AC',
  'SMARCA4',
  'EIF4E'],
 142: ['RPL37A', 'RPL28', 'RPS26'],
 146: ['MAPK11', 'MAPK14'],
 150: ['EPRS'],
 161: ['UBQLN4'],
 163: ['ATXN1'],
 164: ['RBL2'],
 165: ['IGHV7-81',
  'IGHV1-69',
  'C1S',
  'C1QB',
  'FCGR3A',
  'FCGR2C',
  'C1QC',
  'IGHV',
  'C1QA',
  'IGHV4-31'],
 167: ['SMAD9'],
 169: ['MDFI']}