Subgroup effective drugs are defined as:

A criterion was needed for designating drug as effective or ineffective in each subgroup. Requiring the same number of hits across subgroups would unduly favor larger subgroups, while requiring the same fraction of hits would unduly favor smaller subgroups. Instead we required that it be effective in enough samples to achieve a specified confidence that, in the limit, the frequency of efficacy in similar samples would exceed a given threshold. The required number of hits is k = F^{-1}(1- \alpha; n, p), where F(k;n,p)= \sum_{i=0}^{k}{ \binom{n}{i}p^i (1-p)^{(n-i)}} is the cumulative distribution of the binomial distribution and n is the number of tumors of the subgroup in question. We chose =0.05, corresponding to 95% confidence, and p=0.5. 

**This required efficacy in 8 of 10 G3 tumors, 5 of 6 SHH tumors, and 4 of 4 G4 tumors.**

In [1]:
import pandas as pd
min_hits = pd.Series(data=[8,4,5,3], index=['G3','G4','SHH','SHH+p53'])
min_hits

G3         8
G4         4
SHH        5
SHH+p53    3
dtype: int64

In [2]:
import ccalnoir as ccal
import cuzcatlan as cusca
import os

In [3]:
def cls_to_boolean(file):
    with open(file) as f:
        f.readline()
        f.readline()
        line = f.readline()
        vals = line.strip('\n').split(' ')
    return [bool(int(val)) for val in vals]

## Find effective drugs per PDX

In [4]:
# Read all drug scores
all_drug_scores = 'results/pdx_drug_scores.gct'
all_scores = pd.read_table(all_drug_scores, header=2, index_col=0).drop(['Description'],axis=1)

#Define a hit as drugs above certain percentile
# which_percentile = 99.9 # this is what it was for the drug screen
# which_percentile = 90
which_percentile = 75
which_quantile = which_percentile/100

thresholds = all_scores.apply(lambda x: x.quantile(which_quantile), axis=0)
print(f"Computed threshold for the {which_percentile}-th percentile({which_quantile}-th quantile)")
# pd.DataFrame(thresholds).T

Computed threshold for the 75-th percentile(0.75-th quantile)


In [5]:
per_pdx_hits = all_scores>=thresholds

# From: https://xlsxwriter.readthedocs.io/example_pandas_conditional.html
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('results/hits_per_pdx.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
per_pdx_hits.to_excel(writer, sheet_name='Effective Drugs')

# Get the xlsxwriter workbook and worksheet objects.
workbook  = writer.book
worksheet = writer.sheets['Effective Drugs']

# Apply a conditional format to the cell range.

# Black fill with light gray text.
true_format = workbook.add_format({'bg_color':   '#000000',
                                   'font_color': '#444444'})
# White fill with light gray text.
false_format = workbook.add_format({'bg_color':   '#FFFFFF',
                                    'font_color': '#AAAAAA'})

# B1 toU673 is very specific to these results
worksheet.conditional_format('B1:U673', {'type':     'text',
                                         'criteria': 'containing',
                                         'value':    'TRUE',
                                         'format':   true_format})
worksheet.conditional_format('B1:U673', {'type':     'text',
                                         'criteria': 'containing',
                                         'value':    'FALSE',
                                         'format':   false_format})

# Close the Pandas Excel writer and output the Excel file.
writer.save()

print("Successfully wrote and formated file: `results/hits_per_pdx.xlsx`")
# per_pdx_hits

Successfully wrote and formated file: `results/hits_per_pdx.xlsx`


In [6]:
# Maybe add sheets for unique hits?

## Find hits per subgroup

In [7]:
writer = pd.ExcelWriter('results/hits_subgroup_effective.xlsx', engine='xlsxwriter')
first_subgroup = True
combined_sheet = pd.DataFrame()
for subgroup in ['G3','G4','SHH','SHH+p53']:
    if not os.path.exists(f'results/{subgroup}'):
        os.makedirs(f'results/{subgroup}')
    
    phenotype_file=f'results/{subgroup}.cls'
    subgroup_output_filename=f'results/{subgroup}/subgroup_effective.xlsx'
    
    sugbroup_bool = cls_to_boolean(phenotype_file)
    #This assumes per_pdx_hits was generated in a cell above
    subgroup_scores = per_pdx_hits[per_pdx_hits.columns[sugbroup_bool]]
    
    #find mean scores
    subgroup_numeric_scores = all_scores[per_pdx_hits.columns[sugbroup_bool]]
    
    drug_passes_test = subgroup_scores.sum(axis=1)>=min_hits[subgroup]
    drug_passes_test.name = subgroup
    if first_subgroup:
        hits_per_subgroup = pd.DataFrame(drug_passes_test)
        first_subgroup = False
    else:
        hits_per_subgroup = hits_per_subgroup.join(drug_passes_test)
    
    ### INDEX>???
    drug_passes_test.to_excel(writer,sheet_name=f'{subgroup}')
    print(f'Sheet named "{subgroup}" added successfully to file named "results/hits_subgroup_effective.xlsx"!')
    
    to_sheet = pd.concat([drug_passes_test, subgroup_numeric_scores.mean(axis=1)], axis=1)
    to_sheet.columns = [subgroup,'mean_score('+subgroup+')']
    combined_sheet = combined_sheet.join(to_sheet,how='outer')
    to_sheet.to_excel(writer,sheet_name=f'{subgroup}+score')
    print(f'Sheet named "{subgroup}+score" added successfully to file named "results/hits_subgroup_effective.xlsx"!')

hits_per_subgroup.to_excel(writer,sheet_name=f'all_subgroups')
print(f'Sheet named "all_subgroups" added successfully to file named "results/hits_subgroup_effective.xlsx"!')
combined_sheet.to_excel(writer,sheet_name=f'all_subgroups+scores')
print(f'Sheet named "all_subgroups+scores" added successfully to file named "results/hits_subgroup_effective.xlsx"!')

#drop drugs which are not effective
effective_exp = combined_sheet[combined_sheet.apply(lambda x: any([x['G3'],x['G4'],x['SHH']]), axis=1)]
effective_exp = effective_exp.reset_index(drop=True)
effective_exp.to_excel(writer,sheet_name=f'effective_drugs')
print(f'Sheet named "effective_drugs" added successfully to file named "results/hits_subgroup_effective.xlsx"!')

# Get the xlsxwriter workbook and worksheet objects.
workbook  = writer.book
# worksheet = writer.sheets['all_subgroups']

# Apply a conditional format to the cell range.

# Black fill with light gray text.
true_format = workbook.add_format({'bg_color':   '#000000',
                                   'font_color': '#444444'})
# White fill with light gray text.
false_format = workbook.add_format({'bg_color':   '#FFFFFF',
                                    'font_color': '#AAAAAA'})

for sheet in ['all_subgroups','all_subgroups+scores','effective_drugs']:
    worksheet = writer.sheets[sheet]
    # B1 to H673 is very specific to these results
    worksheet.conditional_format('B2:H673', {'type':     'text',
                                             'criteria': 'containing',
                                             'value':    'TRUE',
                                             'format':   true_format})
    worksheet.conditional_format('B2:H673', {'type':     'text',
                                             'criteria': 'containing',
                                             'value':    'FALSE',
                                             'format':   false_format})
writer.save()

Sheet named "G3" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "G3+score" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "G4" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "G4+score" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "SHH" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "SHH+score" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "SHH+p53" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "SHH+p53+score" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "all_subgroups" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "all_subgroups+scores" added successfully to file named "results/hits_subgroup_effective.xlsx"!
Sheet named "effective_drugs" added