
# d1_ic50_medium
- this file is for dream1 data, with medium clusters generated using the ic_50 parameter (spectral clustering)
- we consider two different cuttoffs for adding proteins back:
1. Squareroot method: a protein must connect at least sqrt(num components) components to qualify for addition to a cluster
2. linear method: a protein must connect at least 0.5 * num components componetns to qualify for addition to a cluster


In [4]:
# imports:

# standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# library used for functional enrichment analysis
from func_e.FUNC_E import FUNC_E  # a method that can do funtional enrichment analysis!
import func_e.vocabs.all as vocabs
# following code is necessary as some functions used in the funcE class produce future warnings but i can't fix it as it is not my code
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# importing the custom classes i built
from matrix_class import ProteinMatrix # ppi matrix 
from cluster_class import AllClusters # dictionary to hold all clusters (in form number of cluster : list of proteins in that cluster)
from degreelist_class import DegreeList # creates a list of all proteins in order of their degree

# helper functions for setting up program
from recipe_utils import initialize_matrix_clusters_degreelist
from recipe_utils import find_clusters_and_proteins_together

# helper functions for functional enrichment
from recipe_utils import print_querylist_of_clusters_to_file
from recipe_utils import print_protein_background_to_file
from recipe_utils import create_term_mapping_list
from recipe_utils import get_initialized_fe

 **must change filenames:**

In [5]:
interactions_file = "data/interactions/1_ppi_string_v2.txt"
clusters_file = "data/d1_ic50_medium/dream1_medium-cluster.json"

In [6]:
matrix, clusters, degreelist = initialize_matrix_clusters_degreelist(interactions_filepath=interactions_file, clusters_filepath=clusters_file)

*may want to change parameters below:*

In [7]:
linear_qualifying_clusters, linear_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=False, protein_ratio=.5, protein_constant=0)

In [8]:
sqrt_qualifying_clusters, sqrt_qualifying_proteins = find_clusters_and_proteins_together(matrix, clusters, degreelist, cluster_ratio=0, cluster_constant=2,use_sqrt=True, protein_ratio=1, protein_constant=0)

In [9]:
sqrt_qualifying_proteins

{0: ['PDE4DIP',
  'WDR1',
  'AP2A2',
  'CCNL1',
  'USP34',
  'AP2A1',
  'MAGI1',
  'HECTD3',
  'KIAA0317',
  'PFDN4',
  'ARRB1',
  'SEC13',
  'BCCIP',
  'PRPF4',
  'RNF40'],
 1: ['AKAP9', 'CALR', 'CANX'],
 3: ['PYGO1',
  'PDE6H',
  'PDE6G',
  'GNB5',
  'APC2',
  'WNT10A',
  'WNT6',
  'WNT8A',
  'WNT8B',
  'TCF7L1',
  'WNT9B',
  'WNT9A',
  'WNT16',
  'WNT5B',
  'WNT11',
  'TCF7',
  'WNT7A',
  'WNT10B',
  'APC',
  'WNT2B',
  'SEC22A',
  'WNT3',
  'WNT7B',
  'WNT2',
  'TCF7L2',
  'WNT4',
  'WNT5A',
  'LEF1'],
 4: ['DBN1',
  'GNG10',
  'GNG4',
  'GNG8',
  'GNG5',
  'GNG7',
  'GNG13',
  'GNGT2',
  'GNG11',
  'GNG3',
  'GNG12',
  'CACNA1G',
  'GNB5',
  'GRIN2A',
  'CACNA1B',
  'GRIN2B',
  'GRIN1',
  'CACNA1A',
  'CACNA1D',
  'CACNA1C',
  'CACNA1S',
  'CHUK',
  'IKBKB',
  'CCNT1',
  'GNG2'],
 5: ['NLRP4',
  'RIN1',
  'RABGGTA',
  'LRPAP1',
  'RAP1GAP',
  'SAFB2',
  'ZBP1',
  'SCMH1',
  'RCAN2',
  'RCAN3',
  'SCML2',
  'FADD',
  'TGM2',
  'APTX',
  'RCAN1',
  'PHF16',
  'IRF3',
  'PHF15',
  'I

In [10]:
linear_qualifying_proteins

{0: ['PDE4DIP',
  'WDR1',
  'AP2A2',
  'CCNL1',
  'USP34',
  'AP2A1',
  'MAGI1',
  'HECTD3',
  'KIAA0317',
  'PFDN4',
  'ARRB1',
  'SEC13',
  'BCCIP',
  'PRPF4',
  'RNF40'],
 5: ['NLRP4',
  'RIN1',
  'RABGGTA',
  'LRPAP1',
  'RAP1GAP',
  'SAFB2',
  'ZBP1',
  'SCMH1',
  'RCAN2',
  'RCAN3',
  'SCML2',
  'FADD',
  'TGM2',
  'APTX',
  'RCAN1',
  'PHF16',
  'IRF3',
  'PHF15',
  'INHBB',
  'RNMT',
  'PHF17',
  'MLLT4',
  'DCPS',
  'CHURC1-FNTB',
  'ARR3',
  'SMTNL2',
  'CHML',
  'FNTB',
  'CENPF',
  'SPECC1L',
  'SPECC1',
  'SMTNL1',
  'SMTN',
  'MAGIX',
  'CASP8',
  'SNCA',
  'WWC2',
  'CHM',
  'ACVR1',
  'TBK1',
  'HMGCR',
  'VAV2',
  'MED7',
  'GDI1',
  'WWC1',
  'PAPOLB',
  'PAPOLG',
  'UBE3B',
  'UBE3C',
  'STAT2',
  'SPATA13',
  'PDK1',
  'MAGI1',
  'TAF10',
  'CASP3',
  'HECTD3',
  'HECTD1',
  'KIAA0317',
  'PAPOLA',
  'TGFBR1',
  'ARRB1',
  'BTF3L4',
  'PDPK1',
  'TGFB1',
  'CRNKL1',
  'GDI2',
  'MAGI3',
  'DQX1',
  'DHX32',
  'HECTD2',
  'UBR5',
  'EXOSC2',
  'ACAT1',
  'DHX35',
  '