This notebook performs imports and defines variables and functions used elsewhere in the project.

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
import readline
%load_ext rpy2.ipython
from IPython.display import Image
from IPython.core.display import HTML 

Maybe I need to decouple the file paths from the modules so I can pass them data, and have the logic for reading in the data elsewhere...

Either that or they import the paths from somewhere

# Imports

In [4]:
import os
import sys
import shutil
import subprocess
import itertools
import re
import random
import math
from functools import reduce
from collections import defaultdict, Counter
import json
import pickle
import warnings
import string

import rpy2

def rmagic_warning(
    message,
    category = rpy2.rinterface.RRuntimeWarning,
    filename = '',
    lineno = -1,
    file=None, 
    line=None):
    print(message)
default_showwarning = warnings.showwarning

import matplotlib
import pandas as pd
import numpy as np
import scipy
import networkx as nx
from pandas import ExcelWriter
import openpyxl
from scipy.stats import rankdata, pearsonr, binom, hypergeom
from sklearn.preprocessing import scale
import numpy.ma as ma
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import gridspec
import matplotlib.image as mpl_img
from matplotlib_venn import venn3_unweighted #removed on 2018-11-13
import matplotlib.patches as mpl_patches
from joblib import Parallel, delayed
from statsmodels.nonparametric.kernel_density import KDEMultivariate
import pubchempy as pcp # removed on 2018-11-13 # re added on ˛2018-12-06

RANDOM_SEED = 42
MAX_SEED = 2**32 - 1
SYN_DELIM = ' /// '

# Paths

In [7]:
from wd import BASE_DIR
sys.path.append(BASE_DIR)
DATA_DIR = os.path.join(BASE_DIR, 'data')

RAW_DATA_DIR = os.path.join(DATA_DIR, 'raw')
RAW_EXP_DIR = os.path.join(RAW_DATA_DIR, 'exp')
PDX_AFFY_CEL_DIR = os.path.join(RAW_EXP_DIR, 'sebastian_3.28.17_cels/')
PDX_AFFY_PROBE_EXP_FILE = os.path.join(RAW_EXP_DIR, 'pdx_affy_probe_exp.txt')
PDX_AFFY_ANNOT_FILE = os.path.join(RAW_EXP_DIR, '170328_PDXScreen_Anno_v5.xlsx')

PDX_PRIMARY_BEADCHIP_SERIES_MATRIX_FILE = os.path.join(RAW_EXP_DIR, 'GSE28192_series_matrix.txt')
PDX_PRIMARY_BEADCHIP_ILMN_ANNOT_FILE = os.path.join(RAW_EXP_DIR, 'GPL6102-11574.txt') # downloaded from GEO's page for the platform

PREPROC_DATA_DIR = os.path.join(DATA_DIR, 'preprocessed')
PREPROC_EXP_DIR = os.path.join(PREPROC_DATA_DIR, 'exp')
PREPROC_PDX_AFFY_EXP_FILE = os.path.join(PREPROC_EXP_DIR, 'pdx_affy_exp.csv')
PREPROC_PRIMARY_AFFY_EXP_FILE = os.path.join(PREPROC_EXP_DIR, 'primary_affy_exp.csv')
PDX_SSGSEA_FILE = os.path.join(PREPROC_EXP_DIR, 'pdx_ssgsea.csv')
PRIMARY_SSGSEA_FILE = os.path.join(PREPROC_EXP_DIR, 'primary_ssgsea.csv')
PREPROC_PDX_BEADCHIP_EXP_FILE = os.path.join(PREPROC_EXP_DIR, 'pdx_beadchip_exp.csv')
PREPROC_PRIMARY_BEADCHIP_EXP_FILE = os.path.join(PREPROC_EXP_DIR, 'primary_beadchip_exp.csv')

RAW_METH_DIR = os.path.join(RAW_DATA_DIR, 'meth')
RAW_METH_VALUES_DIR = os.path.join(RAW_METH_DIR, 'Rusert_2018_Meth')
RAW_METH_VALUES_FILE = os.path.join(RAW_METH_VALUES_DIR, 'matrix_averagebeta.txt')
RAW_METH_ANNOT_FILE = os.path.join(RAW_METH_VALUES_DIR, 'Metadata_Rusert_2018.xls')
RAW_METH_IDAT_DIR = os.path.join(RAW_METH_VALUES_DIR, 'raw')
RAW_METH_PROBE_MAPPING_DIR = os.path.join(RAW_METH_DIR, 'sebastian_methyl_12.13.16')

PREPROC_METH_DIR = os.path.join(PREPROC_DATA_DIR, 'meth')
PREPROC_PDX_AVGBETA_FILE = os.path.join(PREPROC_METH_DIR, 'pdx_avg_beta.csv')
PREPROC_PRIMARY_AVGBETA_FILE = os.path.join(PREPROC_METH_DIR, 'primary_avg_beta.csv')
PREPROC_PDX_METH_GENE_SSGSEA_FILE = os.path.join(PREPROC_METH_DIR, 'pdx_meth_gene_ssgsea.csv')
PREPROC_PRIMARY_METH_GENE_SSGSEA_FILE = os.path.join(PREPROC_METH_DIR, 'primary_meth_gene_ssgsea.csv')
PREPROC_PDX_METH_GENESET_SSGSEA_FILE = os.path.join(PREPROC_METH_DIR, 'pdx_meth_geneset_ssgsea.csv')
PREPROC_PRIMARY_METH_GENESET_SSGSEA_FILE = os.path.join(PREPROC_METH_DIR, 'primary_meth_geneset_ssgsea.csv')

RAW_DRUG_SUGGESTION_DIR = os.path.join(RAW_DATA_DIR, 'drug_suggestion')
RAW_EXP_DRUG_SUGGESTION_DIR = os.path.join(RAW_DRUG_SUGGESTION_DIR, 'exp')
EXP_DRUG_SUGGESTION_CONTROLS_DIR = os.path.join(RAW_EXP_DRUG_SUGGESTION_DIR, 'controls')
CEREBELLAR_STEM_EXP_FILE = os.path.join(EXP_DRUG_SUGGESTION_CONTROLS_DIR, 'cerebellar_stem.csv')

RESULTS_DIR = os.path.join(BASE_DIR, 'results')
INTERMEDIATE_RESULTS_DIR = os.path.join(RESULTS_DIR, 'intermediate')
FINAL_RESULTS_DIR = os.path.join(RESULTS_DIR, 'final')
MAIN_TEXT_RESULTS_DIR = os.path.join(FINAL_RESULTS_DIR, 'main_text')
SUPPLEMENTARY_RESULTS_DIR = os.path.join(FINAL_RESULTS_DIR, 'supplementary')

DRUG_SUGGESTION_RESULTS_DIR = os.path.join(INTERMEDIATE_RESULTS_DIR, 'drug_suggestion')
DRUG_SUGGESTION_RESULTS_DIR_EXP = os.path.join(DRUG_SUGGESTION_RESULTS_DIR, 'exp')
DISCOVER_RESULTS_DIR = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'discover')
PDX_DISCOVER_RESULTS_FILE = os.path.join(DISCOVER_RESULTS_DIR, 'pdx_discover.csv')

EXPRESSION_CONTROL = 'cerebellar_stem'

SUPPL_PREPROC_EXP_FILE = os.path.join(SUPPLEMENTARY_RESULTS_DIR, 'preprocessed_expression.xlsx')

DISCOVER_DATA_DIR = os.path.join(RAW_EXP_DRUG_SUGGESTION_DIR, 'discover')
CMAP_DATA_DIR = os.path.join(RAW_EXP_DRUG_SUGGESTION_DIR, 'cmap')
CMAP_VALID_GENES_FILE = os.path.join(CMAP_DATA_DIR, 'valid_genes.txt')
CMAP2CID_FILE = os.path.join(CMAP_DATA_DIR, 'cmap2cid.json')

CMAP_INPUT_DIR = os.path.join(PREPROC_DATA_DIR, 'cmap')
CMAP_OUTPUT_DIR = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'cmap')
CMAP_PDX_BY_DRUG_FILE = os.path.join(CMAP_OUTPUT_DIR, 'cmap_pdx_by_drug.csv')

IPA_INPUT_DIR = os.path.join(PREPROC_DATA_DIR, 'ipa')
IPA_INPUT_FILE = os.path.join(IPA_INPUT_DIR, 'dx_vs_cere_stem_logfold.tsv')
IPA_OUTPUT_DIR = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'ipa')
IPA_OUTPUT_CAUSAL_FILE = os.path.join(IPA_OUTPUT_DIR, 'causal.csv')
IPA_OUTPUT_UPSTREAM_FILE = os.path.join(IPA_OUTPUT_DIR, 'upstream.csv')
IPA_DATA_DIR = os.path.join(RAW_EXP_DRUG_SUGGESTION_DIR, 'ipa')
IPA2CID_FILE = os.path.join(IPA_DATA_DIR, 'ipa2cid.json')

COMBINED_EXP_LOG_ODDS_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'combined_log_odds.csv')
COMBINED_EXP_LOG_ODDS_PVALS_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'combined_log_odds_pvals.csv')
SCREEN_LOG_ODDS_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'screen_log_odds.csv')
SCREEN_BY_EXP_SYNONYM_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'screen_by_exp_synonym.csv')
SCREEN_VIAB_BY_EXP_SYNONYM_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'screen_viab_by_exp_synonym.csv')
SCREEN_HITS_BY_EXP_SYNONYM_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'screen_hits_by_exp_synonym.csv')
COMBINED_EXP_SUBGROUP_SPECIFIC_DRUGS_HEATMAP_FILE = os.path.join(MAIN_TEXT_RESULTS_DIR, 'subgroup_specific_drugs.expression.heatmap.png')
COMBINED_EXP_N_HITS_PER_PDX_BARGRAPH_FILE = os.path.join(MAIN_TEXT_RESULTS_DIR, 'n_hits_per_pdx.expression.bar_graph.png')
COMBINED_EXP_HITS_PER_SUBGROUP_VENN_FILE = os.path.join(MAIN_TEXT_RESULTS_DIR, 'hits_per_subgroup.expression.venn.png')
COMBINED_EXP_HITS_PER_SUBGROUP_UNION_AUTO_ANNOT_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'hits_per_subgroup.exp.annot.auto.csv')
COMBINED_EXP_HITS_PER_PDX_UNION_AUTO_ANNOT_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR_EXP, 'hits_per_pdx.exp.annot.auto.csv')

COMBINED_EXP_SCREEN_MUT_PER_PDX_DRUG_RANKING_FILE = os.path.join(DRUG_SUGGESTION_RESULTS_DIR, 'combined_exp_screen_mut_per_pdx_drug_ranking.xlsx')

DRUG_SUGGESTION_PREPROC_DATA_DIR = os.path.join(PREPROC_DATA_DIR, 'drug_suggestion')
DRUG_ANNOTATION_DIR = os.path.join(DRUG_SUGGESTION_PREPROC_DATA_DIR, 'drug_annotation')
# downloaded at https://clue.io/data/REP on May 19, 2018
CLUE_DRUGS_ANNOT_FILE = os.path.join(DRUG_ANNOTATION_DIR, 'repurposing_drugs_20170327.txt')
CLUE_DRUG_SAMPLES_ANNOT_FILE = os.path.join(DRUG_ANNOTATION_DIR, 'repurposing_samples_20170327.txt')

GENE_ID_CONVERSION_FILE = os.path.join(RAW_DATA_DIR, 'gene_id_conversion.txt')

GENESETS_DIR = os.path.join(RAW_DATA_DIR, 'gene_sets')

PREPROC_SCREEN_DIR = os.path.join(PREPROC_DATA_DIR, 'screen')
SCREEN_TRIPLICATE_VIABILITY_FILE = os.path.join(PREPROC_SCREEN_DIR, 'renormalized_triplicates.csv')
SCREEN_MEAN_VIABILITY_FILE = os.path.join(PREPROC_SCREEN_DIR, 'renormalized_avg_viability.csv')
SCREEN_RTM_NULL_N_DRAWS = 10000000
SCREEN_PER_PDX_VIABILITY_ZSCORES_FILE = os.path.join(PREPROC_SCREEN_DIR, 'rtm_std.csv')
SCREEN_PER_PDX_VIABILITY_CDFS_FILE = os.path.join(PREPROC_SCREEN_DIR, 'rtm_pvals.csv')
SCREEN_GE1_HITS_ANNOT_FILE = os.path.join(PREPROC_SCREEN_DIR, 'James Combined Drug Annotation with SBI names 8.18.17.xlsx')
SCREEN_PDX_HITS_FILE = os.path.join(PREPROC_SCREEN_DIR, 'per_pdx_hits.csv')
SCREEN_SUBGROUP_HITS_FILE = os.path.join(PREPROC_SCREEN_DIR, 'per_subgroup_hits.json')
SCREEN_HITS_FIGURE_FILE = os.path.join(MAIN_TEXT_RESULTS_DIR, 'screen_hits.png')
SCREEN_HITS_SUPPLEMENT_FILE = os.path.join(SUPPLEMENTARY_RESULTS_DIR, 'screen_hits.xlsx')
SUBGROUPS_BY_SIZE_DESCENDING = 'G3 SHH G4'.split()

NMF_RESULTS_DIR = os.path.join(INTERMEDIATE_RESULTS_DIR, 'nmf')
NMF_MODELS_DIR = os.path.join(NMF_RESULTS_DIR, 'models')
NMF_ORDER_SELECTION_METRICS_FILE = os.path.join(NMF_RESULTS_DIR, 'order_selection_metrics.csv')
NMF_ORDER_SELECTION_FIGURE_FILE = os.path.join(NMF_RESULTS_DIR, 'order_selection.png')
NMF_FIT_NITER = 1000#5000000
NMF_CHIBS_NITER = 100# 10000
NMF_SUPPLEMENTARY_FILE = os.path.join(SUPPLEMENTARY_RESULTS_DIR, 'nmf.xlsx')
NMF_K = 3
NMF_K_RANGE = range(2, 11)
SELECTED_NMF_MODEL_FILE = os.path.join(NMF_MODELS_DIR, 'k{}.pkl'.format(NMF_K))
NMF_COMPONENTS_ICS_FILE = os.path.join(NMF_RESULTS_DIR, 'component_ics.csv')
NMF_MATCHING_FEATURES_HEATMAP_FILE = os.path.join(NMF_RESULTS_DIR, 'matching_features_heatmap.png')
NMF_MATCHING_FEATURES_HEATMAP_FILE_C1 = os.path.join(NMF_RESULTS_DIR, 'c1.matching_features_heatmap.png')
NMF_MATCHING_FEATURES_HEATMAP_FILE_C2 = os.path.join(NMF_RESULTS_DIR, 'c2.matching_features_heatmap.png')
NMF_MATCHING_FEATURES_HEATMAP_FILE_C3 = os.path.join(NMF_RESULTS_DIR, 'c3.matching_features_heatmap.png')

RAW_MUT_DIR = os.path.join(RAW_DATA_DIR, 'mut')
RAW_MUT_FILE = os.path.join(RAW_MUT_DIR, '171016_RWR_Screen_Lesions.xlsx')
RAW_CNV_FILE = os.path.join(RAW_MUT_DIR, '171016_PDXScreen_Anno_v8.xlsx')

PREPROC_MUT_DIR = os.path.join(PREPROC_DATA_DIR, 'mut')
PREPROC_MUT_CNA_FILE = os.path.join(PREPROC_MUT_DIR, 'mut_cna.mb_pdx.binary.csv')

In [8]:
import modules.utils as utils

In [9]:
# from DKFZ, based on their methylation data
PDX2SUBGROUP = {'RCMB20': 'G3',
                    'MB002': 'G3',
                    'ICB1299': 'G3',
                    'RCMB28': 'G3',
                    'MED411': 'G3',
                    'ICB1572': 'G3',
                    'MED211': 'G3',
                    'DMB006': 'G4',
                    'MED1712': 'SHH',
                    'RCMB40': 'G3',
                    'RCMB24': 'SHH',
                    'RCMB32': 'SHH',
                    'MED1911': 'G3',
                    'MB009': 'G3',
                    'MED2312': 'G4',
                    'RCMB18': 'SHH',
                    'BT084': 'SHH',
                    'ICB984': 'SHH',
                    'ICB1487': 'G4',
                    'RCMB38': 'G4'}

SHH_P53_SAMPLES = ['BT084', 'RCMB18', 'ICB984']
PDX2SUBGROUP_W_SHH_P53 = dict(PDX2SUBGROUP)
for s in SHH_P53_SAMPLES:
    PDX2SUBGROUP_W_SHH_P53[s] = 'p53 SHH'
SUBGROUPS = set(PDX2SUBGROUP.values())
SUBGROUP2PDXS = {sg: [pdx for pdx in PDX2SUBGROUP if PDX2SUBGROUP[pdx] == sg] for sg in SUBGROUPS}
G3_SAMPLES = SUBGROUP2PDXS['G3']
G4_SAMPLES = SUBGROUP2PDXS['G4']
SHH_SAMPLES = SUBGROUP2PDXS['SHH']

SCREEN_RTM_ALPHA = 0.001
SCREEN_RTM_CONFIDENCE = (1 - SCREEN_RTM_ALPHA) * 100
SCREEN_RTM_CARTOON_FILE = os.path.join(PREPROC_SCREEN_DIR, 'screen_rtm_cartoon.png')
SCREEN_RTM_FIGURE_FILE = os.path.join(PREPROC_SCREEN_DIR, 'screen_rtm_figure.png')
LOADED_384WELL_PLATE_CARTOON_FILE = os.path.join(PREPROC_SCREEN_DIR, 'loaded_plate_cartoon.png')
CELLTITERGLO_PLATE_CARTOON_FILE = os.path.join(PREPROC_SCREEN_DIR, 'celltiterglo_plate_cartoon.png')

In [10]:
SUMEET_PDX_TO_CID_FILE = os.path.join(PREPROC_SCREEN_DIR, 'PubchemCID_7729.xlsx')
sumeet_df = pd.read_excel(SUMEET_PDX_TO_CID_FILE, index_col=0)
nonan = sumeet_df.PubChem_CID.dropna().astype(int)
nan_ids = [sbi for sbi in sumeet_df.index if sbi not in nonan.index]
SBI_TO_CID_BY_STRUCTURE = dict(zip(nonan.index, nonan.values))
SBI_TO_CID_BY_NAME_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi_to_cids_byname.json')
SBI_TO_CID_UNION_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi_to_cid_structure_name_union.json')
SBI_TO_DRUGNAME_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi2drugname.json')
SBI_TO_DRUGNAME_MERGED_BY_CIDS_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi2drugname.merged_by_cids.json')
SBI_TO_DRUGNAME_MERGED_SIMILAR_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi2drugname.merged_similar.json')
SBI_TO_CID_MERGED_SIMILAR_FILE = os.path.join(PREPROC_SCREEN_DIR, 'sbi2cid.merged_similar.json')

In [11]:
# sent to me by Jessica Rusert on 7.28.16
SBI_DRUGNAME_SOURCE_FILE = os.path.join(PREPROC_SCREEN_DIR, 'Averaged data on 20 PDX lines_7.28.16 with Mol Names.xlsx')

In [12]:
official_subtype_colors = {'G3': 'yellow', 'SHH': 'red', 'G4': 'green', 'WNT': 'blue'}
alpha_twenty_samples = ['BT084',
                        'DMB006',
                        'ICB1299',
                        'ICB1487',
                        'ICB1572',
                        'ICB984',
                        'MB002',
                        'MB009',
                        'MED1712',
                        'MED1911',
                        'MED211',
                        'MED2312',
                        'MED411',
                        'RCMB18',
                        'RCMB20',
                        'RCMB24',
                        'RCMB28',
                        'RCMB32',
                        'RCMB38',
                        'RCMB40']

In [14]:
CLUE_DRUGS_ANNOT_FILE

'/Users/edjuaro/GoogleDrive/pdx/mb-pdx-hts/data/preprocessed/drug_suggestion/drug_annotation/repurposing_drugs_20170327.txt'

In [15]:
def load_clue_drug_annot_dicts():
    clue_drugs_df = pd.read_table(CLUE_DRUGS_ANNOT_FILE, comment='!', index_col=0, encoding='windows-1252')
    clue_samples_df = pd.read_table(CLUE_DRUG_SAMPLES_ANNOT_FILE, comment='!', index_col=1, encoding='windows-1252')
    moa = clue_drugs_df.moa.dropna()
    moa = dict(zip(moa.index, moa))
    target = clue_drugs_df.target.dropna()
    target = dict(zip(target.index, target))

    clue_cid = clue_samples_df.pubchem_cid.dropna()
    for i in range(len(clue_cid)):
        cid = clue_cid.iloc[i]
        drugname = clue_cid.index[i]
        if drugname in moa:
            moa[str(int(cid))] = moa[drugname]
        if drugname in target:
            target[str(int(cid))] = target[drugname]
    for d in [moa, target]:
        d = {key.lower(): val for key, val in d.items()}
    return moa, target

clue_drug2moa, clue_drug2target = load_clue_drug_annot_dicts()
        
def create_drug_annot_dicts(our_druglist):
    our_drug_to_moa = {}
    our_drug_to_target = {}
    for syn_string in our_druglist:
        syns = syn_string.split(SYN_DELIM)
        for syn in syns:
            if syn.lower() in clue_drug2moa:
                our_drug_to_moa[syn_string] = clue_drug2moa[syn.lower()]
            if syn.lower() in clue_drug2target:
                our_drug_to_target[syn_string] = clue_drug2target[syn.lower()]
    return our_drug_to_moa, our_drug_to_target

In [16]:
"""
sbi_to_cid = load_sbi_to_cid()
cid_to_sbis = defaultdict(list)
for sbi, cid in sbi_to_cid.items():
    cid_to_sbis[cid].append(sbi)
sbi_to_drugname = load_sbi_to_drugname()
sbi_to_drugname['SBI-0637109'] = 'GAG Agonist, Adhesamine'
sbi_to_drugname['SBI-0637109.P001'] = 'GAG Agonist, Adhesamine'
drugname_to_sbi = load_drugname_to_sbi()
"""

"\nsbi_to_cid = load_sbi_to_cid()\ncid_to_sbis = defaultdict(list)\nfor sbi, cid in sbi_to_cid.items():\n    cid_to_sbis[cid].append(sbi)\nsbi_to_drugname = load_sbi_to_drugname()\nsbi_to_drugname['SBI-0637109'] = 'GAG Agonist, Adhesamine'\nsbi_to_drugname['SBI-0637109.P001'] = 'GAG Agonist, Adhesamine'\ndrugname_to_sbi = load_drugname_to_sbi()\n"

# med/pdx/init.py

In [17]:
"""from projects.medulloblastoma import *
from data.cancer.medulloblastoma.pdx import *
from toolbox.misc import reverse_dict
from data.genesets import load_genesets


official_subtype_colors = {'G3': 'yellow', 'SHH': 'red', 'G4': 'green', 'WNT': 'blue'}
# single-letter colors are different; yellow looks like yellow-green

alpha_twenty_samples = ['BT084',
                        'DMB006',
                        'ICB1299',
                        'ICB1487',
                        'ICB1572',
                        'ICB984',
                        'MB002',
                        'MB009',
                        'MED1712',
                        'MED1911',
                        'MED211',
                        'MED2312',
                        'MED411',
                        'RCMB18',
                        'RCMB20',
                        'RCMB24',
                        'RCMB28',
                        'RCMB32',
                        'RCMB38',
                        'RCMB40']

# from DKFZ, based on their methylation data
pdx_subtype_dict = {'RCMB20': 'G3',
                    'MB002': 'G3',
                    'ICB1299': 'G3',
                    'RCMB28': 'G3',
                    'MED411': 'G3',
                    'ICB1572': 'G3',
                    'MED211': 'G3',
                    'DMB006': 'G4',
                    'MED1712': 'SHH',
                    'RCMB40': 'G3',
                    'RCMB24': 'SHH',
                    'RCMB32': 'SHH',
                    'MED1911': 'G3',
                    'MB009': 'G3',
                    'MED2312': 'G4',
                    'RCMB18': 'SHH',
                    'BT084': 'SHH',
                    'ICB984': 'SHH',
                    'ICB1487': 'G4',
                    'RCMB38': 'G4'}

shh_p53_samples = ['BT084', 'RCMB18', 'ICB984']
subtype_dict_w_shh_p53 = dict(pdx_subtype_dict)
for s in shh_p53_samples:
    subtype_dict_w_shh_p53[s] = 'p53 SHH'

g3_samples = [s for s, st in pdx_subtype_dict.items() if st == 'G3']
g4_samples = [s for s, st in pdx_subtype_dict.items() if st == 'G4']
shh_samples = [s for s, st in pdx_subtype_dict.items() if st == 'SHH']

ad_sbi = 'SBI-0052701.P001'

pdx_dr = load_pdx_drug_response().loc[alpha_twenty_samples]

pdx_exp = load_pdx_expression().loc[alpha_twenty_samples]  # to have in same order and eliminate 21st sample

pdx_primary_exp = load_pdx_primary_expression()

# alpha = 0.75 was used
pdx_ssgsea = load_pdx_ssgsea().loc[alpha_twenty_samples]

pdx_mut = load_pdx_mutations().loc[alpha_twenty_samples]

ge1_sbis = load_ge1_sbis()

sbi_to_cid = load_sbi_to_cid()
cid_to_sbis = defaultdict(list)
for sbi, cid in sbi_to_cid.items():
    cid_to_sbis[cid].append(sbi)
sbi_to_drugname = load_sbi_to_drugname()
sbi_to_drugname['SBI-0637109'] = 'GAG Agonist, Adhesamine'
sbi_to_drugname['SBI-0637109.P001'] = 'GAG Agonist, Adhesamine'
drugname_to_sbi = load_drugname_to_sbi()

def load_reasonable_drugs():
    reasonable_drugs_file = '/home/jamesdj/Dropbox/PDX-HTS Paper/per_tumor_drug_suggestions/reasonable_drugs/reasonable_drugs.csv'
    with open(reasonable_drugs_file, 'r') as f:
        rdrugs = [row.strip() for row in f.readlines()]
    return rdrugs

reasonable_drugs = load_reasonable_drugs()


"""

"from projects.medulloblastoma import *\nfrom data.cancer.medulloblastoma.pdx import *\nfrom toolbox.misc import reverse_dict\nfrom data.genesets import load_genesets\n\n\nofficial_subtype_colors = {'G3': 'yellow', 'SHH': 'red', 'G4': 'green', 'WNT': 'blue'}\n# single-letter colors are different; yellow looks like yellow-green\n\nalpha_twenty_samples = ['BT084',\n                        'DMB006',\n                        'ICB1299',\n                        'ICB1487',\n                        'ICB1572',\n                        'ICB984',\n                        'MB002',\n                        'MB009',\n                        'MED1712',\n                        'MED1911',\n                        'MED211',\n                        'MED2312',\n                        'MED411',\n                        'RCMB18',\n                        'RCMB20',\n                        'RCMB24',\n                        'RCMB28',\n                        'RCMB32',\n                        'RCMB38',\n