
# d2_ic50_medium
- this file is for dream2 data, with medium clusters generated using the ic_10 parameter (spectral clustering)
- we consider two different cuttoffs for adding proteins back:
1. Squareroot method: a protein must connect at least sqrt(num components) components to qualify for addition to a cluster
2. linear method: a protein must connect at least 0.5 * num components componetns to qualify for addition to a cluster


In [1]:
# imports:

# standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# library used for functional enrichment analysis
from func_e.FUNC_E import FUNC_E  # a method that can do funtional enrichment analysis!
import func_e.vocabs.all as vocabs
# following code is necessary as some functions used in the funcE class produce future warnings but i can't fix it as it is not my code
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# importing the custom classes i built
from matrix_class import ProteinMatrix # ppi matrix 
from cluster_class import AllClusters # dictionary to hold all clusters (in form number of cluster : list of proteins in that cluster)
from degreelist_class import DegreeList # creates a list of all proteins in order of their degree

# helper functions for setting up program
from recipe_utils import initialize_matrix_clusters_degreelist
from recipe_utils import find_clusters_and_proteins_together

# helper functions for functional enrichment
from recipe_utils import print_querylist_of_clusters_to_file
from recipe_utils import print_protein_background_to_file
from recipe_utils import create_term_mapping_list
from recipe_utils import get_initialized_fe

 **must change filenames:**

In [2]:
interactions_file = "data/interactions/2_ppi_inweb_v2.txt"
clusters_file = "data/d2_ic50_medium/dream2_medium-cluster.json"

In [3]:
matrix, clusters, degreelist = initialize_matrix_clusters_degreelist(interactions_filepath=interactions_file, clusters_filepath=clusters_file)

*may want to change parameters below:*

In [4]:
linear_qualifying_clusters, linear_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=False, protein_ratio=.5, protein_constant=0)

In [5]:
sqrt_qualifying_clusters, sqrt_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=True, protein_ratio=1, protein_constant=0)

In [6]:
sqrt_qualifying_proteins

{1: ['MAPK8', 'PLCG1', 'NCK1', 'STAT3', 'MAPK9', 'PIK3R1', 'PRKCB1', 'ABL1'],
 3: ['PLCG1', 'CBL', 'LCK', 'PIK3R2', 'LYN', 'PIK3R1', 'ABL1'],
 6: ['ING4',
  'CFDP1',
  'THOC2',
  'SYNCRIP',
  'VPS72',
  'YEATS4',
  'LDHA',
  'SRCAP',
  'MORF4L1',
  'PPIF',
  'TPR',
  'TARS',
  'SSB',
  'RAB6C',
  'SLC25A6',
  'RAB6B',
  'PHB2',
  'KPNA5',
  'HIST1H2BD',
  'H2BFS',
  'HIST2H2BA',
  'HIST1H2BJ',
  'HIST3H2BB',
  'HIST1H2BB',
  'LOC440926',
  'H3F3B',
  'ATP1A1',
  'KPNA6',
  'HIST1H2BM',
  'EIF6',
  'RSL1D1',
  'HIST2H2BF',
  'SLC25A5',
  'RPS26P8',
  'RPS26P10',
  'CAPZB',
  'RPS26',
  'TOP1'],
 7: ['F3',
  'F10',
  'THBS1',
  'C3',
  'PRSS3',
  'AHSG',
  'OR8D2',
  'APOA4',
  'KRT16',
  'GFAP',
  'APOC3',
  'QTRTD1',
  'FGA',
  'IGHV4-31',
  'TTR',
  'KRT5',
  'KRT9',
  'KRT6A',
  'APOC1',
  'APOA1',
  'ALB',
  'KRT14',
  'FN1',
  'TIAM1',
  'KRT1',
  'KRT10'],
 9: ['PCNA', 'SLC25A5'],
 56: ['ATXN1'],
 58: ['IGHV7-81',
  'IGHV1-69',
  'C1S',
  'C1QB',
  'FCGR3A',
  'FCGR2C',
  'C1QC',


In [7]:
linear_qualifying_proteins

{6: ['LOC440926', 'H3F3B', 'KPNA6', 'HIST1H2BM'],
 7: ['F3',
  'F10',
  'THBS1',
  'C3',
  'PRSS3',
  'AHSG',
  'OR8D2',
  'APOA4',
  'KRT16',
  'GFAP',
  'APOC3',
  'QTRTD1',
  'FGA',
  'IGHV4-31',
  'TTR',
  'KRT5',
  'KRT9',
  'KRT6A',
  'APOC1',
  'APOA1',
  'ALB',
  'KRT14',
  'FN1',
  'TIAM1',
  'KRT1',
  'KRT10'],
 56: ['ATXN1'],
 58: ['IGHV7-81',
  'IGHV1-69',
  'C1S',
  'C1QB',
  'FCGR3A',
  'FCGR2C',
  'C1QC',
  'IGHV',
  'C1QA',
  'IGHV4-31'],
 70: ['C6orf125',
  'PITPNB',
  'VMA21',
  'C14orf156',
  'LYPLA1',
  'C19orf10',
  'MRPL14',
  'DDT',
  'CCDC44',
  'RAB14',
  'SAR1A',
  'PLP2',
  'OXSR1',
  'OS9',
  'HIF1AN',
  'NUDT5',
  'FKBP8',
  'BTF3',
  'MTPN',
  'COTL1',
  'UBE2V1',
  'TACC2',
  'RAB5C',
  'PPP2R2B',
  'HRSP12',
  'CCDC59',
  'STMN1',
  'MALT1',
  'PDCD5',
  'ACAT2',
  'TGFB1I1',
  'WDR6',
  'EPAS1',
  'RHOC',
  'UBE2L3',
  'SNX3',
  'HPRT1',
  'CACYBP',
  'ACP1',
  'YARS',
  'VAMP3',
  'TAGLN2',
  'FZD2',
  'BRWD2',
  'PTGES3',
  'NASP',
  'BRWD1',
  'APRT'