In [3]:
#[gene for gene, estimates in fluxes_mles['nonsmoking'].items() if len(estimates) == 4]

Note: need to regraph results with the additional genes

for genes whose from 110 gamma is significantly greater than from WT:

    find genes whose from 100 and from 010 gamma are significantly different
        and
    identify which of those is greater
for genes whose from 110 and from WT gammas are not significantly different:

    find genes whose from 100 or from 010 gammas are significantly different from the rest of the gammas

In [4]:
import os
import numpy as np
import pandas as pd

from locations import location_output
from locations import pts_by_mutation_file
from locations import results_keys
from locations import samples_per_combination_files

## * Load mutation rates

mutation_rates = {
    key:pd.read_csv(os.path.join(location_output,
                                 f'{key}_mutation_rates.txt'),
                    index_col=0)['rate'].to_dict()
    for key in results_keys if 'plus' not in key}

mutation_rates['smoking_plus'] = pd.read_csv(os.path.join(location_output,
                                 'smoking_mutation_rates.txt'),
                                 index_col=0)['rate'].to_dict()
                                 
mutation_rates['nonsmoking_plus'] = pd.read_csv(os.path.join(location_output,
                                 'nonsmoking_mutation_rates.txt'),
                                 index_col=0)['rate'].to_dict()



## * Load selection rates, without epistasis
'''
selection_ne_mles = {
    key:pd.read_csv(
        os.path.join(location_output,
                     f'{key}_selections_no_epistasis.txt'),
        index_col=0)['selection_intensity'].to_dict()
    for key in results_keys if 'plus' not in key}

selection_ne_mles['smoking_plus'] = pd.read_csv(
                                    os.path.join(location_output,
                                    'smoking_selections_no_epistasis.txt'),
                                    index_col=0)['selection_intensity'].to_dict()

selection_ne_mles['nonsmoking_plus'] = pd.read_csv(
                                    os.path.join(location_output,
                                    'nonsmoking_selections_no_epistasis.txt'),
                                    index_col=0)['selection_intensity'].to_dict()

selection_ne_cis = {
    key:pd.read_csv(
        os.path.join(location_output,
                     f'{key}_selections_no_epistasis.txt'),
        index_col=0)[['ci_low_95', "ci_high_95"]].apply(
            lambda x: [x[0], x[1]], axis=1).to_dict()
    for key in results_keys if 'plus' not in key}

selection_ne_cis['smoking_plus'] = pd.read_csv(
                                    os.path.join(location_output,
                                    'smoking_selections_no_epistasis.txt'),
                                    index_col=0)[['ci_low_95', "ci_high_95"]].apply(
                                    lambda x: [x[0], x[1]], axis=1).to_dict()

selection_ne_cis['nonsmoking_plus'] = pd.read_csv(
                                    os.path.join(location_output,
                                    'nonsmoking_selections_no_epistasis.txt'),
                                    index_col=0)[['ci_low_95', "ci_high_95"]].apply(
                                    lambda x: [x[0], x[1]], axis=1).to_dict()
'''


## * Compute fluxes, without epistasis

def compute_lambdas(gammas, mu):
    if isinstance(gammas, float):
        return gammas*mu
    elif isinstance(gammas, list):
        return [gammas[0]*mu, gammas[1]*mu]
    elif isinstance(gammas, dict):
        return {x_y:compute_gammas(the_gamma, mu)
                for x_y, the_gamma in gammas.items()}
'''
fluxes_ne_mles = {
    key:{gene:compute_lambdas(selection_ne_mles[key][gene],
                              mutation_rates[key][gene])
         for gene in selection_ne_mles[key].keys()}
    for key in results_keys}


fluxes_ne_cis = {
    key:{gene:compute_lambdas(selection_ne_cis[key][gene],
                              mutation_rates[key][gene])
         for gene in selection_ne_cis[key].keys()}
    for key in results_keys}
'''

## * Load fluxes with epistasis

fluxes_mles = {
    key:np.load(os.path.join(location_output,
                             f'{key}_fluxes_mles.npy'),
                allow_pickle=True).item()
    for key in results_keys}


fluxes_cis = {
    key:np.load(os.path.join(location_output,
                             f'{key}_fluxes_cis.npy'),
                allow_pickle=True).item()
    for key in results_keys}


## * Compute selection coefficients with epistasis

def compute_gammas(lambdas, mu):
    if isinstance(lambdas, float):
        return lambdas/mu
    elif isinstance(lambdas, list):
        return [lambdas[0]/mu, lambdas[1]/mu]
    elif isinstance(lambdas, dict):
        return {x_y:compute_gammas(the_lambda, mu)
                for x_y, the_lambda in lambdas.items()}


selection_mles = {
    key:{gene:compute_gammas(fluxes_mles[key][gene],
                             mutation_rates[key][gene])
         for gene in set.intersection(
                 set(fluxes_mles[key].keys()),
                 set([gene.upper() for gene in mutation_rates[key].keys()]))}
    for key in results_keys}


selection_cis = {
    key:{gene:compute_gammas(fluxes_cis[key][gene],
                             mutation_rates[key][gene])
         for gene in set.intersection(
                 set(fluxes_cis[key].keys()),
                 set([gene.upper() for gene in mutation_rates[key].keys()]))}
    for key in results_keys}


## * Helper function to filter the results with epistasis

def filter_estimates(all_estimates, from_x_to_y, genes=None):
    if genes is None:
        genes = list(all_estimates.keys())
    return {gene:estimates[from_x_to_y]
            for gene, estimates in all_estimates.items()
            if gene in genes}


def filter_110_to_111(all_estimates, genes=None):
    return filter_estimates(all_estimates,
                            ((1, 1, 0), (1, 1, 1)),
                            genes)

def filter_000_to_001(all_estimates, genes=None):
    return filter_estimates(all_estimates,
                            ((0, 0, 0), (0, 0, 1)),
                            genes)

def filter_100_to_101(all_estimates, genes=None):
    return filter_estimates(all_estimates,
                            ((1, 0, 0), (1, 0, 1)),
                            genes)

def filter_010_to_011(all_estimates, genes=None):
    return filter_estimates(all_estimates,
                            ((0, 1, 0), (0, 1, 1)),
                            genes)



def provide_all_relevant_lambdas_and_gammas(results_keys=results_keys):
    """Construct a dictionary with all relevant results for fluxes and
    selection coefficients.

    Keys of the dictionary are tuples of the form:

         (result_key, es, what)

    result_key can be any of:
        - pan_data
        - smoking
        - nonsmoking

    es is the epistasis status:
        - 'no_epi': for no epistasis considered
        - 'epi': if epistasis is considered (in this case we consider
          fluxes and selections from KRAS+TP53 to KRAS+TP53+ the third
          gene in the model)

    what refers to the estimation:
        - 'mles': for the maximum likehood estimator
        - 'cis': for the 95% confidence interval (given as a two item list)

    This function returns a tuple with the lambdas and the
    gammas. Each value of lambdas and gammas is another dictionary
    with the third gene as key and respective estimate as value.

    """

    lambdas = {(key, 'from_110', 'mles'):filter_110_to_111(fluxes_mles[key])
                    for key in results_keys}
    lambdas.update({(key, 'from_110', 'cis'):filter_110_to_111(fluxes_cis[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_000', 'mles'):filter_000_to_001(fluxes_mles[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_000', 'cis'):filter_000_to_001(fluxes_cis[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_100', 'mles'):filter_100_to_101(fluxes_mles[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_100', 'cis'):filter_100_to_101(fluxes_cis[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_010', 'mles'):filter_010_to_011(fluxes_mles[key])
                    for key in results_keys})
    lambdas.update({(key, 'from_010', 'cis'):filter_010_to_011(fluxes_cis[key])
                    for key in results_keys})

    gammas = {(key, 'from_110', 'mles'):filter_110_to_111(selection_mles[key])
                    for key in results_keys}
    gammas.update({(key, 'from_110', 'cis'):filter_110_to_111(selection_cis[key])
                    for key in results_keys})
    gammas.update({(key, 'from_000', 'mles'):filter_000_to_001(selection_mles[key])
                    for key in results_keys})
    gammas.update({(key, 'from_000', 'cis'):filter_000_to_001(selection_cis[key])
                    for key in results_keys})
    gammas.update({(key, 'from_100', 'mles'):filter_100_to_101(selection_mles[key])
                    for key in results_keys})
    gammas.update({(key, 'from_100', 'cis'):filter_100_to_101(selection_cis[key])
                    for key in results_keys})
    gammas.update({(key, 'from_010', 'mles'):filter_010_to_011(selection_mles[key])
                    for key in results_keys})
    gammas.update({(key, 'from_010', 'cis'):filter_010_to_011(selection_cis[key])
                    for key in results_keys})
    

    return {'lambdas':lambdas, 'gammas':gammas}

def find_significant_differences(ci_list_1, ci_list_2):
    ci_1_greater = [gene for gene, cis in ci_list_1.items() if ci_list_2[gene][1] < cis[0]]
    ci_2_greater = [gene for gene, cis in ci_list_1.items() if cis[1] < ci_list_2[gene][0]]

    return {'first_greater': ci_1_greater, 'second_greater': ci_2_greater}

## * Number of patients with mutation per gene

pts_per_mutation = pd.read_csv(pts_by_mutation_file, index_col=0)


## * Patients per mutation combination for all TP53, KRAS, and third gene models

samples_per_combination = {
    key:pd.read_csv(samples_per_combination_files[key],
                    index_col='third gene')
    for key in results_keys}

In [5]:
results = provide_all_relevant_lambdas_and_gammas()

In [6]:
signif_differences = find_significant_differences(results['gammas']['pan_data','from_110','cis'], results['gammas']['pan_data','from_000','cis'])
signif_differences['TP53_KRAS']  = signif_differences.pop('first_greater')
signif_differences['WT']  = signif_differences.pop('second_greater')

In [7]:
TP53_KRAS_greater = signif_differences['TP53_KRAS']
WT_greater = signif_differences['WT']

In [8]:
gammas = results['gammas']

In [9]:
TP53_cis = {gene:cis for gene,cis in gammas['pan_data','from_100','cis'].items() if gene in TP53_KRAS_greater}
KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_010','cis'].items() if gene in TP53_KRAS_greater}

signif_differences = find_significant_differences(TP53_cis, KRAS_cis)
TP53_greater = signif_differences.pop('first_greater')
KRAS_greater  = signif_differences.pop('second_greater')
similar_gammas = list(set(TP53_KRAS_greater) - set(TP53_greater + KRAS_greater))

In [10]:
print(f'TP53: \n\t{TP53_greater}\nKRAS: \n\t{KRAS_greater}\nSimilar: \n\t{similar_gammas}')

TP53: 
	['LYST', 'PTEN', 'MET', 'EML4', 'KDR', 'ROS1', 'PRKCG', 'CDC27', 'PAPPA2', 'PDGFRA', 'NF1', 'NOTCH2', 'FAT1', 'FGFR1', 'BRAF', 'RAD17', 'APC', 'EPHA7', 'CCND1', 'RB1', 'ERBB4']
KRAS: 
	['U2AF1', 'STK11', 'ATM', 'RBM10', 'KEAP1', 'NKX2-1']
Similar: 
	['ZMYM2', 'RET', 'TSC2', 'MGA', 'CBL', 'BRCA2', 'MYC', 'EPHA3', 'TERT', 'GRM1', 'POLD1', 'NCOR2', 'FGFR3', 'PRKDC', 'ATF7IP', 'KMT2D', 'ALK', 'NAV3', 'MSH2', 'PTPRD', 'BRCA1', 'PPP2R1A', 'NFE2L2', 'MECOM', 'SMAD4', 'BRD3', 'FGFR2', 'SETBP1', 'RYR1', 'SLIT3', 'ERBB2', 'MAP2K1', 'PBRM1', 'TSC1', 'FLT4', 'PARP4', 'FGFR4', 'SMARCA4', 'ARID1A', 'RHPN2', 'PIK3CA', 'SMG1', 'AXL', 'FBXW7', 'SCN8A', 'AKT1', 'POLE', 'LRP1B', 'CCNE1', 'INHBA', 'SETD2', 'NTRK2', 'POLR2A', 'PXDNL', 'TLR4']


In [11]:
TP53_cis = {gene:cis for gene,cis in gammas['pan_data','from_100','cis'].items() if gene in WT_greater}
KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_010','cis'].items() if gene in WT_greater}

find_significant_differences(TP53_cis, KRAS_cis)

{'first_greater': ['KIF5B', 'EGFR'], 'second_greater': []}

In [15]:
WT_cis = {gene:cis for gene,cis in gammas['pan_data','from_000','cis'].items() if gene in WT_greater}
TP53_cis = {gene:cis for gene,cis in gammas['pan_data','from_100','cis'].items() if gene in WT_greater}
KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_010','cis'].items() if gene in WT_greater}
TP53_KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_110','cis'].items() if gene in WT_greater}

print(find_significant_differences(WT_cis, TP53_cis))
print(find_significant_differences(WT_cis, KRAS_cis))

{'first_greater': [], 'second_greater': ['EGFR']}
{'first_greater': ['KIF5B', 'EGFR'], 'second_greater': []}


The majority of genes (84/93) have significant difference between gamma from WT and from TP53 & KRAS. 82/84 are greater from TP53 & KRAS, only 2 are greater from WT. The lack of consideration of changing mutation rates might contribute to this gap.

For the genes where the gamma is significantly greater after TP53 & KRAS than from WT, most (55/82) have no significant difference in gamma between TP53 and KRAS

21 genes have a greater gamma after TP53 than after KRAS. 
6 genes have a greater gamma after KRAS than after TP53.

For the 2 genes where the gamma from WT is significantly greater than from TP53 & KRAS, both (EGFR & KIF5B) have a higher gamma after TP53 than after KRAS. The gamma from WT is greater than the gamma from KRAS for both genes but when compared to the gamma from TP53, there is an insignificant difference for KIF5B, and for EGFR, the gamma from TP53 is greater.

In [12]:
genes = gammas['pan_data','from_110','cis'].keys()

signif_differences = find_significant_differences(gammas['pan_data','from_110','cis'], gammas['pan_data','from_000','cis'])

similar_gammas = list(set(genes) - set(sum(signif_differences.values(), [])))

In [13]:
TP53_cis = {gene:cis for gene,cis in gammas['pan_data','from_100','cis'].items() if gene in similar_gammas}
KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_010','cis'].items() if gene in similar_gammas}
find_significant_differences(TP53_cis, KRAS_cis)

{'first_greater': ['NRAS', 'RASA1'], 'second_greater': []}

Amongst the few genes (9/93) that feature no significant differencein gamma from WT and from TP53 & KRAS, 2 (NRAS & RASA1) have a greater gamma from TP53 than from KRAS. May be due to mutation rate bias again, but NRAS is an equivalent to KRAS and would likely be negatively epistatic with it.

In [17]:
WT_cis = {gene:cis for gene,cis in gammas['pan_data','from_000','cis'].items() if gene in TP53_KRAS_greater}
TP53_cis = {gene:cis for gene,cis in gammas['pan_data','from_100','cis'].items() if gene in TP53_KRAS_greater}
KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_010','cis'].items() if gene in TP53_KRAS_greater}
TP53_KRAS_cis = {gene:cis for gene,cis in gammas['pan_data','from_110','cis'].items() if gene in TP53_KRAS_greater}

print(find_significant_differences(WT_cis, TP53_cis))
print(find_significant_differences(WT_cis, KRAS_cis))

{'first_greater': [], 'second_greater': ['ZMYM2', 'LYST', 'CBL', 'MYC', 'EPHA3', 'FGFR3', 'KMT2D', 'NAV3', 'MSH2', 'BRCA1', 'PTEN', 'MET', 'SMAD4', 'SLIT3', 'STK11', 'ATM', 'TSC1', 'FLT4', 'PIK3CA', 'EML4', 'KDR', 'SCN8A', 'RET', 'TERT', 'GRM1', 'ROS1', 'NCOR2', 'PRKDC', 'PPP2R1A', 'PRKCG', 'RBM10', 'PAPPA2', 'ERBB2', 'PBRM1', 'AKT1', 'SETD2', 'PDGFRA', 'PXDNL', 'CCNE1', 'TLR4', 'TSC2', 'NF1', 'BRCA2', 'POLD1', 'NOTCH2', 'KEAP1', 'PTPRD', 'FAT1', 'SETBP1', 'FGFR1', 'RYR1', 'MAP2K1', 'PARP4', 'ARID1A', 'BRAF', 'INHBA', 'POLR2A', 'ALK', 'MGA', 'APC', 'ATF7IP', 'NKX2-1', 'NFE2L2', 'MECOM', 'BRD3', 'FGFR2', 'EPHA7', 'FGFR4', 'SMARCA4', 'CCND1', 'SMG1', 'AXL', 'FBXW7', 'RB1', 'POLE', 'LRP1B', 'ERBB4', 'NTRK2']}
{'first_greater': ['EML4', 'CDC27', 'RAD17'], 'second_greater': ['CBL', 'MYC', 'EPHA3', 'KMT2D', 'NAV3', 'BRCA1', 'SMAD4', 'U2AF1', 'SLIT3', 'STK11', 'ATM', 'TSC1', 'FLT4', 'PIK3CA', 'KDR', 'SCN8A', 'RET', 'TERT', 'GRM1', 'PRKDC', 'PPP2R1A', 'RBM10', 'PAPPA2', 'ERBB2', 'PBRM1', 'SETD

In [21]:
tmp1 = find_significant_differences(WT_cis, gammas['pan_data','from_100','cis'])
tmp2 = find_significant_differences(WT_cis, gammas['pan_data','from_010','cis'])

print(set(tmp1['second_greater']) - set(tmp2['second_greater']))
print(set(tmp2['second_greater']) - set(tmp1['second_greater']))

{'ZMYM2', 'LYST', 'ROS1', 'NCOR2', 'FGFR3', 'ATF7IP', 'MSH2', 'PRKCG', 'PTEN', 'MECOM', 'MET', 'BRD3', 'FGFR2', 'MAP2K1', 'PARP4', 'CCND1', 'EML4', 'RB1', 'BRAF', 'AKT1'}
{'U2AF1'}


Of the 82 genes, the majority feature a higher gamma from TP53 or KRAS than from WT. Most of these overlap but the gammas from TP53 feature a few significant genes that are not present in the significant gammas from KRAS.

## Reason for EGFR epistatic difference between smokers and nonsmokers

In [162]:
#EGFR_pathway_genes = ['MUC','CTNNB1','CDH1','STAT','JAK','SRC','VAV', 'RHO']
EGFR_pathway_genes = ['BRAF','MEK','ERK','MAPK','MAP2K1','PIK3CA','PTEN','AKT1','NFKB','RAS','RAF','RAC','TSC1','TSC2','NF1','JAK1','JAK2','SHC','SOS','GRB2','STAT','MYC','FOXO3A','IRS1','IRS2','PDK','AMPK1','STK11']
RAS_pathway_genes = pd.read_csv('~/Downloads/PID_RAS_PATHWAY.v7.5.1.tsv', sep = '\t').iloc[18,1].split(',')
PI3K_AKT_pathway_genes = pd.read_csv('~/Downloads/PID_PI3KCI_AKT_PATHWAY.v7.5.1.tsv', sep = '\t').iloc[18,1].split(',')
MTOR_pathway_genes = pd.read_csv('~/Downloads/PID_MTOR_4PATHWAY.v7.5.1.tsv', sep = '\t').iloc[18,1].split(',')

EGFR_pathway_genes = list(set(EGFR_pathway_genes + RAS_pathway_genes + PI3K_AKT_pathway_genes + MTOR_pathway_genes))
#set(EGFR_pathway_genes) - set(RAS_pathway_genes + PI3K_AKT_pathway_genes + MTOR_pathway_genes)
EGFR_pathway_genes.pop(EGFR_pathway_genes.index('KRAS'))

'KRAS'

Below is an incomplete way to find the genes in our gene list that are part of the EGFR pathway. My manually defined genes may be gene families whose members may be in our gene list but are not detected as such.

In [163]:
from locations import gene_list_file
gene_list = list(pd.read_csv(gene_list_file, header=None)[0])
gene_list = [gene.upper() for gene in gene_list]
gene_list = gene_list[:103]

EGFR_pathway_genes = [gene for gene in EGFR_pathway_genes if gene in gene_list]


In [164]:
from filter_data import prefiltered_dbs, filter_samples_for_gene
from count_combinations import updated_compute_samples

keys = ['pan_data','smoking_plus','nonsmoking_plus']

pts_per_combination = {key:{gene: updated_compute_samples(filter_samples_for_gene(gene, prefiltered_dbs[key]), 
                                                          mutations = ['KRAS','EGFR',gene])
                            for gene in EGFR_pathway_genes} 
                       for key in keys}

"\npts_per_combination = {key:{gene:list() for gene in EGFR_pathway_genes} for key in keys}\n\nfor key in keys:\n    for gene in EGFR_pathway_genes:\n        db = filter_samples_for_gene(gene, prefiltered_dbs[key])\n        pts_per_combination[key][gene] =  updated_compute_samples(db, mutations = ['KRAS','EGFR',gene])\n"

For some reason, the groupby.size function used for updated_compute_samples doesn't always work as intended, so instead of putting 0s in the array, it just removes elments with value zero from the array. I really need to fix that.

In [167]:
key = 'nonsmoking_plus'
gene = 'BRAF'

updated_compute_samples(filter_samples_for_gene(gene, prefiltered_dbs[key]), 
                        mutations = ['KRAS','EGFR',gene],
                        print_info = True)

   KRAS  EGFR  BRAF  count
0     0     0     0    277
1     0     0     1     17
2     0     1     0    114
3     0     1     1      3
4     1     0     0     36
5     1     0     1      0


array([277,  17, 114,   3,  36,   0])

The code chunk below doesn't return true results right now  because the nonsmoking_plus counts are all wrong because of the missing values issue.

In [172]:
from main import are_all_fluxes_computable

def at_least_000_to_010_and_001_to_011(samples):
    return np.all(samples[[0,1]] > 0)

fluxes_computable = {key:{gene:are_all_fluxes_computable(samples) 
                          for gene, samples in patient_counts.items()} 
                     for key, patient_counts in pts_per_combination.items()}

some_fluxes_computable = {key:{gene:at_least_000_to_010_and_001_to_011(samples) 
                               for gene, samples in patient_counts.items()} 
                          for key, patient_counts in pts_per_combination.items()}

for key in keys:
    print(key)
    print([gene for gene, value in fluxes_computable[key].items() if value == False])
    print([gene for gene, value in some_fluxes_computable[key].items() if value == False])

pan_data
['MAP2K1']
[]
smoking_plus
['MYC', 'NRAS', 'HRAS', 'AKT1', 'CCNE1', 'MAP2K1']
[]
nonsmoking_plus
['MYC', 'TSC1', 'NRAS', 'HRAS', 'MAP2K1']
[]


In [168]:
#no samples with EGFR + MAP2K1 in pan_data which is somewhat expected
gene = 'BRAF'
key = 'smoking'

print(
    updated_compute_samples(filter_samples_for_gene(gene, prefiltered_dbs[key]), 
                            mutations = ['KRAS','EGFR',gene],
                            print_info = True)
)

key = 'nonsmoking'

print(
updated_compute_samples(filter_samples_for_gene(gene, prefiltered_dbs[key]), 
                        mutations = ['KRAS','EGFR',gene],
                        print_info = True)
)

   KRAS  EGFR  BRAF  count
0     0     0     0    377
1     0     0     1     35
2     0     1     0     40
3     0     1     1      6
4     1     0     0    220
5     1     0     1      6
6     1     1     0      1
7     1     1     1      1
[377  35  40   6 220   6   1   1]
   KRAS  EGFR  BRAF  count
0     0     0     0    182
1     0     0     1      9
2     0     1     0     78
3     0     1     1      2
4     1     0     0     22
5     1     0     1      0
[182   9  78   2  22   0]


In [173]:
gene = 'BRAF'
key = 'smoking'
mutations = ['KRAS','EGFR',gene]

db = filter_samples_for_gene(gene, prefiltered_dbs[key])

pts_per_combination = db.groupby(mutations).size().unstack(fill_value=0).stack()
pts_per_combination.reset_index().rename(columns={0:'count'})

{'pan_data': {'MYC': array([3597,   27,  937,    4, 2412,   13,   46,    2]),
  'PTEN': array([4122,   86, 1107,   28, 2952,   34,   50,    4]),
  'STK11': array([3786,  389, 1106,   17, 2580,  371,   48,    6]),
  'TSC1': array([3541,   57,  923,    9, 2368,   27,   47,    0]),
  'PIK3CA': array([3998,  258, 1084,   66, 2869,  157,   47,    8]),
  'NRAS': array([4154,   87, 1146,    2, 3003,   15,   52,    3]),
  'PRKDC': array([2100,  160,  470,   27, 1297,  103,   27,    4]),
  'HRAS': array([4240,   16, 1145,    5, 3021,    5,   54,    1]),
  'AKT1': array([4227,   29, 1140,   10, 3014,   12,   52,    3]),
  'CCNE1': array([3540,   24,  920,    4, 2358,   19,   42,    5]),
  'TSC2': array([3503,   95,  912,   20, 2332,   63,   44,    3]),
  'NF1': array([3356,  302,  914,   34, 2306,  137,   43,    5]),
  'MAP2K1': array([3676,   41,  975,    0, 2490,   12,   47,    2]),
  'BRAF': array([3866,  390, 1123,   27, 2955,   71,   50,    5]),
  'RASA1': array([2298,   37,  651,   11, 147