# Literature Deconvolution - Pre-Processing

In [None]:
import sys 
%load_ext rpy2.ipython
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
%matplotlib inline
import copy
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import anndata as ad
import scipy
import seaborn as sns
sc.set_figure_params(scanpy=True, dpi_save=200)
import jenkspy

In [None]:
def load_fractions(dem_path):
    def scale_series(series): 
        return((series - np.mean(series))/np.std(series))
    def scale_series_mad(series): 
        return((series - np.median(series))/scipy.stats.median_absolute_deviation(series))

    # load CIBERSORT output
    dat = sc.read_csv(dem_path, first_column_names=True)
    dat = dat[:,~dat.var_names.isin(['P-value', 'Correlation', 'RMSE', 'Absolute score (sig.score)'])]
    dat.obs.index.rename('Sample', inplace=True)

    # add the absolute scores
    cell_type_scores = pd.DataFrame(dat.X, index=dat.obs_names, columns=dat.var_names+"-Absolute")
    # Yields a tuple of column name and series for each column in the dataframe
    for (columnName, columnData) in cell_type_scores.iteritems():
        cell_type_scores[columnName.replace('Absolute', 'Abs.Category')] = np.where(columnData.values > np.median(columnData.values), "High", "Low")

    dat.obs = dat.obs.merge(cell_type_scores, how = 'left', left_index=True, right_index=True)

    # Relative Scores (tumor only)
    relative_scores = pd.DataFrame(dat.X, index=dat.obs_names, columns=dat.var_names+"-Relative")
    relative_scores = (relative_scores.transpose() / relative_scores.sum(axis=1).transpose()).transpose()
    for (columnName, columnData) in relative_scores.iteritems():
        relative_scores[columnName.replace('Relative', 'Rel.Category')] = np.where(columnData.values > np.median(columnData.values), "High", "Low")
    dat.obs = dat.obs.merge(relative_scores, how = 'left', left_index=True, right_index=True)
    
    return dat

In [None]:
def score_lsc_signature(adata, signature, name, scale=False):
    def scale_series_mad(series): 
        return((series - np.median(series))/scipy.stats.median_absolute_deviation(series))

    scores=np.zeros(adata.obs.shape[0])
    for row in enumerate(signature.Gene): 
        scores += adata.X[:,adata.var_names==row[1]].flatten() * signature.iloc[row[0],].values[1]

    adata.obs[name] = scores
    adata.obs[name+'_cat'] = np.where(adata.obs[name] > adata.obs[name].median(), "High", "Low")
    
    if scale == True: 
        adata.obs[name] = scale_series_mad(scores)

    return(adata)

In [None]:
hcat = sc.read("Data/Hierarchy_Reference_3Cohorts.h5ad")
hcat

In [None]:
PC2_loadings = hcat.varm['PCs'][:,1]
PC2_loadings = np.array([-0.20023263, -0.08772767, -0.27906924, -0.5126374, 0.7101968, 0.04527085, 0.32419923])

## Loading Prediction Output

In [None]:
def plot_celltype_fractions(adata, group, grouplab, order, boxpairlist, score, score_lab, txtformat = 'simple', save = False): 
    import os
    sns.set_style("white")

    tk.violin_category(adata, group, score, order=order, boxpairlist=boxpairlist, 
                   textformat=txtformat, xlab=grouplab, ylab=score_lab)
    
    #fig = plt.gcf()
    if save == True: 
        dirName = "figures/"
        if not os.path.exists(dirName):
            os.mkdir(dirName)
        fig.savefig(dirName+"/"+group+"_"+score+".png", dpi = 300, bbox_inches='tight')
    plt.show()

In [None]:
def get_ranklist(dat, classification, cluster_A, cluster_B, test='wilcoxon'):
    
    adata = dat[dat.obs[classification].isin([cluster_A, cluster_B])]

    sc.tl.rank_genes_groups(adata, classification, method = test, n_genes=20000)
    sam_cat_DE_df = pd.concat(
            [pd.DataFrame(adata.uns['rank_genes_groups']['names']),
             pd.DataFrame(adata.uns['rank_genes_groups']['scores']),
             pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges']),
             pd.DataFrame(adata.uns['rank_genes_groups']['pvals'])],
        keys = ['names','scores','logfoldchanges', 'pvals'],axis=1)

    sam_cat_DE_df.columns = sam_cat_DE_df.columns.swaplevel(0, 1)
    sam_cat_DE_df.sort_index(axis=1,level=0,inplace=True)
    sam_cat_DE_df = sam_cat_DE_df[cluster_A][['names', 'scores', 'logfoldchanges', 'pvals']]

    return(sam_cat_DE_df)

# Run on Cell Lines

In [None]:
def get_scores(df, dat, classification, cluster_A, cluster_B, cohort, drug, target):
    # Absolute Scores
        
    ## Wilcox
    temp = get_ranklist(dat, classification, cluster_A, cluster_B, test='wilcoxon')
    temp['Cohort'] = cohort
    temp['Drug'] = drug
    temp['Target'] = target
    temp['Test'] = 'wilcox'
    df = df.append(temp)
    
    return(df)

In [None]:
df = pd.DataFrame()

### 5 Inhibitor

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_5inhibitor_AF9.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs.index = dat.obs.index.str.replace('.','_')
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')
dat.obs['Timepoint'] = [x[2] for x in dat.obs.index.str.split('_')]
dat.obs['Line'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Cohort'] = "5_inhibitors"

### 5 Inhibitor nonAF9

In [None]:
dat2 = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_5inhibitor_nonAF9.csv")
dat2.X = (dat2.X.transpose() / dat2.X.sum(axis=1)).transpose()
dat2.obs.index = dat2.obs.index.str.replace('.','_')
dat2.obs['Treatment'] = [x[1] for x in dat2.obs.index.str.split('_')]
dat2.obs['Treatment'] = dat2.obs['Treatment'].str.replace('DMSO','Control')
dat2.obs['Timepoint'] = [x[2] for x in dat2.obs.index.str.split('_')]
dat2.obs['Line'] = [x[0] for x in dat2.obs.index.str.split('_')]
dat2.obs['Cohort'] = "5_inhibitors"

In [None]:
dat = dat.concatenate(dat2)
del(dat2)

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
# MLL-AF9
for line in ['MOLM13', 'MOLM14', 'OCIAML2', 'NOMO1']:
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'Brequinar', 'Control', line, 'Brequinar', 'DHODH')
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'BAY155', 'Control', line, 'BAY155', 'Menin-MLL')
    df = get_scores(df, dat[(dat.obs.Timepoint == '96h') & (dat.obs.Line == line)], 'Treatment', 'EPZ5676', 'Control', line, 'EPZ5676', 'DOT1L')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'OTX015', 'Control', line, 'OTX015', 'BET')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'BAY1251152', 'Control', line, 'BAY1251152', 'CDK9')

In [None]:
# MLL, non-AF9
for line in ['ML2', 'MV411']:
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'Brequinar', 'Control', line, 'Brequinar', 'DHODH')
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'BAY155', 'Control', line, 'BAY155', 'Menin-MLL')
    df = get_scores(df, dat[(dat.obs.Timepoint == '96h') & (dat.obs.Line == line)], 'Treatment', 'EPZ5676', 'Control', line, 'EPZ5676', 'DOT1L')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'OTX015', 'Control', line, 'OTX015', 'BET')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'BAY1251152', 'Control', line, 'BAY1251152', 'CDK9')

In [None]:
# Non-MLL
for line in ['OCIAML5', 'HL60', 'OCIAML3']:
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'Brequinar', 'Control', line, 'Brequinar', 'DHODH')
    df = get_scores(df, dat[(dat.obs.Timepoint == '24h') & (dat.obs.Line == line)], 'Treatment', 'BAY155', 'Control', line, 'BAY155', 'Menin-MLL')
    df = get_scores(df, dat[(dat.obs.Timepoint == '96h') & (dat.obs.Line == line)], 'Treatment', 'EPZ5676', 'Control', line, 'EPZ5676', 'DOT1L')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'OTX015', 'Control', line, 'OTX015', 'BET')
    df = get_scores(df, dat[(dat.obs.Timepoint.isin(['8h', '24h'])) & (dat.obs.Line == line)], 'Treatment', 'BAY1251152', 'Control', line, 'BAY1251152', 'CDK9')

### BET ARV-825 OTX015

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ARV825OTX015_BET.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = np.where(dat.obs.index.str.contains("ARV"), "ARV-825", 
                               np.where(dat.obs.index.str.contains("OTX015"), "OTX015", "Control"))

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'ARV-825', 'Control', "OCI-AML5", 'ARV-825', 'BET')

In [None]:
df = get_scores(df, dat, 'Treatment', 'OTX015', 'Control', "OCI-AML5", 'OTX015', 'BET')

### YKL Palbociclib BSJ CDK6 Selective

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_YKL_Palbociclib_BSJ_CDK6_Selective.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'Palbo', 'Control', "MV4-11", 'Palbociclib', 'CDK4/6')

In [None]:
df = get_scores(df, dat, 'Treatment', 'YKL', 'Control', "MV4-11", 'YKL (CDK6/IKZF)', 'CDK6/IKZF')

In [None]:
df = get_scores(df, dat, 'Treatment', 'BSJ', 'Control', "MV4-11", 'BSJ (CDK6 only)', 'CDK6 only')

### ASLAN003 DHODH

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ASLAN003_DHODH.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs.index = dat.obs.index.str.replace('.', '_')
dat.obs['Line'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat[dat.obs.Line == 'KG1'], 'Treatment', 'ASLAN003', 'Control', "KG1", 'ASLAN003', 'DHODH')

In [None]:
df = get_scores(df, dat[dat.obs.Line == 'MOLM14'], 'Treatment', 'ASLAN003', 'Control', "MOLM-14", 'ASLAN003', 'DHODH')

### NSC-370284 TET1

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_NSC370284_TET1.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('-')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('Ctrl','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'NSC', 'Control', "THP-1", 'NSC-370', 'TET1')

### AC-4-130 STAT5

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_AC4130_STAT5.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Line'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = [x[3] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat[dat.obs.Line=='MV4'], 'Treatment', 'AC', 'Control', "MV4-11", 'AC-4-130', 'STAT5')

In [None]:
df = get_scores(df, dat[dat.obs.Line=='MOLM'], 'Treatment', 'AC', 'Control', "MOLM-13", 'AC-4-130', 'STAT5')

### MitoCopper MitoBlock6

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_MitoCopper_MitoBlock6.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[2] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('ctrl','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'MB6', 'Control', "OCI-AML2", 'MitoBlock6', 'Mito Copper')

### GSK690 RN-1 KDM1A

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_GSK690RN-1_KDM1A.csv")
anno = pd.read_csv("GSE71740_RNA-seq_sample_annotations.txt", delimiter='\t').set_index('Sample')
dat.obs = dat.obs.join(anno)
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.SampleTitle.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')
del(anno)

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==24) & (dat.obs['Cell Line'] == 'SK-NO-1')], 'Treatment', 'GSK690', 'Control', "SKNO-1", 'GSK690 (24hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==24) & (dat.obs['Cell Line'] == 'SK-NO-1')], 'Treatment', 'RN-1', 'Control', "SKNO-1", 'RN-1 (24hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==24) & (dat.obs['Cell Line'] == 'Kasumi-1')], 'Treatment', 'GSK690', 'Control', "Kasumi-1", 'GSK690 (24hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==24) & (dat.obs['Cell Line'] == 'Kasumi-1')], 'Treatment', 'RN-1', 'Control', "Kasumi-1", 'RN-1 (24hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==72) & (dat.obs['Cell Line'] == 'SK-NO-1')], 'Treatment', 'GSK690', 'Control', "SKNO-1", 'GSK690 (72hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==72) & (dat.obs['Cell Line'] == 'SK-NO-1')], 'Treatment', 'RN-1', 'Control', "SKNO-1", 'RN-1 (72hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==72) & (dat.obs['Cell Line'] == 'Kasumi-1')], 'Treatment', 'GSK690', 'Control', "Kasumi-1", 'GSK690 (72hr)', 'KDM1A')

In [None]:
df = get_scores(df, dat[(dat.obs.Timepoint==72) & (dat.obs['Cell Line'] == 'Kasumi-1')], 'Treatment', 'RN-1', 'Control', "Kasumi-1", 'RN-1 (72hr)', 'KDM1A')

### EPZ004777 DOT1L

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_EPZ004777_DOT1L.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat = dat[dat.obs.index!='Sum']
dat.obs['Line'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control').str.replace('DFCI', 'EPZ004777')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat[dat.obs.Line == 'OCI2'], 'Treatment', 'EPZ004777', 'Control', "OCI-AML2", 'EPZ004777', 'DOT1L')

In [None]:
df = get_scores(df, dat[dat.obs.Line == 'OCI3'], 'Treatment', 'EPZ004777', 'Control', "OCI-AML3", 'EPZ004777', 'DOT1L')

### FB23-1 FB23-2 FTO

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_FB23_FTO.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs.index = dat.obs.index.str.replace('.','_')
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', '231', 'Control', "NB-4", 'FB23-1', 'FTO')

In [None]:
df = get_scores(df, dat, 'Treatment', '232', 'Control', "NB-4", 'FB23-2', 'FTO')

### CS1 CS2 FTO

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_CS_FTO.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs.index = dat.obs.index.str.replace('.','_')
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'CS1', 'Control', "NOMO-1", 'CS1', 'FTO')

In [None]:
df = get_scores(df, dat, 'Treatment', 'CS2', 'Control', "NOMO-1", 'CS2', 'FTO')

### OTX015 MTHFR BET

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_OTX015_MTHFRBET.csv")
dat_IMSM2 = dat[0:12]
dat_IMSM2.X = (dat_IMSM2.X.transpose() / dat_IMSM2.X.sum(axis=1)).transpose()
dat_IMSM2.obs['Treatment'] = [x[1] for x in dat_IMSM2.obs.index.str.split('_')]
dat_IMSM2.obs['Treatment'] = dat_IMSM2.obs['Treatment'].str.replace('DMSO','Control')
dat_IMSM2.obs['Treatment'] = dat_IMSM2.obs['Treatment'].str.replace('OTX','OTX015')

In [None]:
newdat = dat_IMSM2.to_df()
newdat['PC2'] = (dat_IMSM2.to_df() * PC2_loadings).sum(axis=1)
dat_IMSM2 = ad.AnnData(newdat, dat_IMSM2.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_IMSM2, classification = 'Treatment', cluster_A = 'OTX015', cluster_B = 'Control', cohort = 'IMS-M2', drug = 'OTX015', target = 'BET')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_OTX015_MTHFRBET.csv")
dat_U937 = dat[12:]
dat_U937.X = (dat_U937.X.transpose() / dat_U937.X.sum(axis=1)).transpose()
dat_U937.obs['Treatment'] = [x[1] for x in dat_U937.obs.index.str.split('_')]
dat_U937.obs['Treatment'] = dat_U937.obs['Treatment'].str.replace('DMSO','Control')
dat_U937.obs['Treatment'] = dat_U937.obs['Treatment'].str.replace('OTX','OTX015')

In [None]:
newdat = dat_U937.to_df()
newdat['PC2'] = (dat_U937.to_df() * PC2_loadings).sum(axis=1)
dat_U937 = ad.AnnData(newdat, dat_U937.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_U937, classification = 'Treatment', cluster_A = 'OTX015', cluster_B = 'Control', cohort = 'U-937', drug = 'OTX015', target = 'BET')

### MRT199665 Phospho MEF2C

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_MRT199665_PhosphoMEF2C.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x for x in dat.obs.index]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('rep','').str.replace('[0-9]', '')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'MRT', cluster_B = 'Control', cohort = 'OCI-AML3', drug = 'MRT199665', target = 'Phospho MEF2C')

### I-BET152 BET

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_IBET152_BET.csv")
dat_K562 = dat[:10]
dat_K562.X = (dat_K562.X.transpose() / dat_K562.X.sum(axis=1)).transpose()
dat_K562.obs['Treatment'] = [x[1] for x in dat_K562.obs.index.str.split('_')]
dat_K562.obs['Treatment'] = dat_K562.obs['Treatment'].str.replace('DMSO', 'Control').str.replace('I5000', 'I-BET152')
dat_K562.obs['Treatment'] = dat_K562.obs['Treatment'].str.replace('I500', 'I-BET152')
dat_K562.obs['Treatment'] = dat_K562.obs['Treatment'].str.replace('I50', 'I-BET152')

In [None]:
newdat = dat_K562.to_df()
newdat['PC2'] = (dat_K562.to_df() * PC2_loadings).sum(axis=1)
dat_K562 = ad.AnnData(newdat, dat_K562.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_K562, classification = 'Treatment', cluster_A = 'I-BET152', cluster_B = 'Control', cohort = 'K-562', drug = 'I-BET152', target = 'BET')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_IBET152_BET.csv")
dat_MV411 = dat[10:]
dat_MV411.X = (dat_MV411.X.transpose() / dat_MV411.X.sum(axis=1)).transpose()
dat_MV411.obs['Treatment'] = [x[1] for x in dat_MV411.obs.index.str.split('_')]
dat_MV411.obs['Treatment'] = dat_MV411.obs['Treatment'].str.replace('DMSO', 'Control').str.replace('I5000', 'I-BET152')
dat_MV411.obs['Treatment'] = dat_MV411.obs['Treatment'].str.replace('I500', 'I-BET152')
dat_MV411.obs['Treatment'] = dat_MV411.obs['Treatment'].str.replace('I50', 'I-BET152')

In [None]:
newdat = dat_MV411.to_df()
newdat['PC2'] = (dat_MV411.to_df() * PC2_loadings).sum(axis=1)
dat_MV411 = ad.AnnData(newdat, dat_MV411.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_MV411, classification = 'Treatment', cluster_A = 'I-BET152', cluster_B = 'Control', cohort = 'MV4-11', drug = 'I-BET152', target = 'BET')

### ALRN-6924 MDMX/MDM2

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ALRN6924_MDMXMDM2.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('Vehicle','Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ALRN-6924', cluster_B = 'Control', cohort = 'MOLM-13', drug = 'Dexamathasone', target = 'MDMX/MDM2')

### Dexamethasone Quizartinib FLT3

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_DexaQuiz_FLT3.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('-')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO','Control').str.replace('Quiz', 'Quizartinib').str.replace('Dex', 'Dexamathasone')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Dexamathasone', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Dexamathasone', target = 'FLT3')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Quizartinib', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Quizartinib', target = 'FLT3')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Quizartinib + Dexamathasone', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Quizartinib + Dexamathasone', target = 'FLT3')

### EPZ5676 GSK 126 DOT1L EZH2 (MV4-11)

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_EPZGSK_DOT1LEZH2_MV411.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('NC', 'Control').str.replace('low', '').str.replace('high', '').str.replace('EZH2', 'GSK126').str.replace('DOT1L', 'EPZ5676').str.replace('Combo', 'GSK126 + EPZ5676')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK126', cluster_B = 'Control', cohort = 'MV4-11', drug = 'GSK126', target = 'EZH2')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'EPZ5676', cluster_B = 'Control', cohort = 'MV4-11', drug = 'EPZ5676', target = 'DOT1L')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK126 + EPZ5676', cluster_B = 'Control', cohort = 'MV4-11', drug = 'EPZ5676 + GSK126', target = 'DOT1L + EZH2')

### EPZ5676 GSK 126 DOT1L EZH2 (MOLM-14)

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_EPZGSK_DOT1LEZH2_MOLM.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x for x in dat.obs.index]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('Molm', '').str.replace('[0-9]', '').str.replace('DMSO', 'Control').str.replace('DotL', 'EPZ5676').str.replace('Ezh', 'GSK126').str.replace('Combo', 'GSK126 + EPZ5676')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK126', cluster_B = 'Control', cohort = 'MOLM-14', drug = 'GSK126', target = 'EZH2')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'EPZ5676', cluster_B = 'Control', cohort = 'MOLM-14', drug = 'EPZ5676', target = 'DOT1L')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK126 + EPZ5676', cluster_B = 'Control', cohort = 'MOLM-14', drug = 'EPZ5676 + GSK126', target = 'DOT1L + EZH2')

###  BRD0705 BRD0320 GSKa/b

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_BRD_GSK3ab.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'BRD0705', cluster_B = 'Control', cohort = 'U-937', drug = 'BRD0705', target = 'GSK3a')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'BRD0320', cluster_B = 'Control', cohort = 'U-937', drug = 'BRD0320', target = 'GSK3a/b')

### CX-5461 I-BET151 Doxorubicin POL-I BET TOPO-II

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_CX5461IBET151Doxorubicin_POLIBETTOPOII.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x for x in dat.obs.index]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('\.[^.]*$', '').str.replace('[0-9]', '').str.replace('.', '_')
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('v', 'Control').str.replace('ib', 'I-BET').str.replace('dox', 'Doxo').str.replace('cx', 'CX')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'CX', cluster_B = 'Control', cohort = 'MV4-11', drug = 'CX-5461', target = 'POL I')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Doxo', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Doxorubicin', target = 'BET')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'I-BET', cluster_B = 'Control', cohort = 'MV4-11', drug = 'I-BET151', target = 'BET')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Doxo_I-BET', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Doxorubicin + I-BET151', target = 'BET')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'CX_I-BET', cluster_B = 'Control', cohort = 'MV4-11', drug = 'CX-5461 + I-BET151', target = 'POL I + BET')

### VTP-50469 Menin-MLL

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_VTP50469_MeninMLL.csv")
dat_IMSM2 = dat[:18]
dat_IMSM2.X = (dat_IMSM2.X.transpose() / dat_IMSM2.X.sum(axis=1)).transpose()
dat_IMSM2.obs['Treatment'] = [x[2] for x in dat_IMSM2.obs.index.str.split('_')]
dat_IMSM2.obs['Treatment'] = dat_IMSM2.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat_IMSM2.to_df()
newdat['PC2'] = (dat_IMSM2.to_df() * PC2_loadings).sum(axis=1)
dat_IMSM2 = ad.AnnData(newdat, dat_IMSM2.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_IMSM2, classification = 'Treatment', cluster_A = 'VTP', cluster_B = 'Control', cohort = 'IMS-M2', drug = 'VTP-50469', target = 'Menin-MLL')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_VTP50469_MeninMLL.csv")
dat_OCIAML3 = dat[18:]
dat_OCIAML3.X = (dat_OCIAML3.X.transpose() / dat_OCIAML3.X.sum(axis=1)).transpose()
dat_OCIAML3.obs['Treatment'] = [x[2] for x in dat_OCIAML3.obs.index.str.split('_')]
dat_OCIAML3.obs['Treatment'] = dat_OCIAML3.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat_OCIAML3.to_df()
newdat['PC2'] = (dat_OCIAML3.to_df() * PC2_loadings).sum(axis=1)
dat_OCIAML3 = ad.AnnData(newdat, dat_OCIAML3.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_OCIAML3, classification = 'Treatment', cluster_A = 'VTP', cluster_B = 'Control', cohort = 'OCI-AML3', drug = 'VTP-50469', target = 'Menin-MLL')

### LSD1 MTORC1 OG86/RAD001

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_OG86RAD001_LSD1MTORC1.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'OG86', cluster_B = 'Control', cohort = 'THP-1', drug = 'OG86', target = 'LSD1')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'RAD001', cluster_B = 'Control', cohort = 'THP-1', drug = 'RAD001', target = 'MTORC1')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'RAD001-OG86', cluster_B = 'Control', cohort = 'THP-1', drug = 'RAD001 + OG86', target = 'LSD1 + MTORC1')

### MB3 GSK-LSD1 ATRA GCN5 LSD1 RARA

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_MB3GSKLSD1ATRA_GCN5LSD1RARA.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x for x in dat.obs.index]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('untreated', 'Control')
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('^[^_]*_', '').str.replace('_', '-')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA', cluster_B = 'Control', cohort = 'HL-60', drug = 'ATRA', target = 'RARA')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'MB3', cluster_B = 'Control', cohort = 'HL-60', drug = 'MB3', target = 'GCN5')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK-LSD1', cluster_B = 'Control', cohort = 'HL-60', drug = 'GSK-LSD1', target = 'LSD1')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA-MB3', cluster_B = 'Control', cohort = 'HL-60', drug = 'ATRA + MB3', target = 'RARA + GCN-5')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA-GSK-LSD1', cluster_B = 'Control', cohort = 'HL-60', drug = 'ATRA + GSK-LSD1', target = 'RARA + LSD1')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA-MB3-GSK-LSD1', cluster_B = 'Control', cohort = 'HL-60', drug = 'ATRA + MB3 + GSK-LSD1', target = 'RARA + GCN5 + LSD1')

### 22d HDAC8

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_22d_HDAC8.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x for x in dat.obs.index]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('[1-3]', '')
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('treat', '22d').str.replace('c', 'C')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = '22d', cluster_B = 'Control', cohort = 'MV4-11', drug = '22d', target = 'HDAC8')

### Entosplentinib SYK

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Entosplentinib_SYK.csv")
dat = dat[:12]
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[2] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Entospletinib', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Entospletinib', target = 'SYK')

### Azacitidine Hypometh

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Azacytidine_Hypometh.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'Azacytidine', cluster_B = 'Control', cohort = 'OCI-AML3', drug = '5-Aza', target = 'Hypometh')

### AlCAr Pyrimidine Synthesis

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_AlCAr_PyrimidineSynthesis.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split(' ')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('[0-9]','').str.replace('ctrl', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'AICAr', cluster_B = 'Control', cohort = 'U-937', drug = 'AlCAr', target = 'Pyrimidine Synthesis')

### INCB059872 LSD1

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_INCB059872_LSD1.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'INCB059872', cluster_B = 'Control', cohort = 'OCI-AML3', drug = 'INCB059872', target = 'LSD1')

### AT HA MYB

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ATHA_MYB.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control').str.replace('[0-9]', '')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'AT', cluster_B = 'Control', cohort = 'THP-1', drug = 'AT', target = 'MYB')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'HA', cluster_B = 'Control', cohort = 'THP-1', drug = 'HA', target = 'MYB')

### GSK3186000A GSK343 PRMT5 EZH2

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_GSK3186000AGSK343_PRMT5EZH2.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK3186000A', cluster_B = 'Control', cohort = 'MOLM-13', drug = 'GSK3186000A', target = 'PRMT5')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK343', cluster_B = 'Control', cohort = 'MOLM-13', drug = 'GSK343', target = 'EZH2')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'GSK3186000A+GSK343', cluster_B = 'Control', cohort = 'MOLM-13', drug = 'GSK3186000A+GSK343', target = 'PRMT5 + EZH2')

### ATRA TSA RARA HDAC

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ATRATSA_RARAHDAC.csv")
dat = dat[:12]
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA', cluster_B = 'Control', cohort = 'AP-1060', drug = 'ATRA', target = 'RARA')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'TSA', cluster_B = 'Control', cohort = 'AP-1060', drug = 'TSA', target = 'HDAC')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA+TSA', cluster_B = 'Control', cohort = 'AP-1060', drug = 'ATRA + TSA', target = 'RARA + HDAC')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ATRATSA_RARAHDAC.csv")
dat = dat[12:24]
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA', cluster_B = 'Control', cohort = 'HL60', drug = 'ATRA', target = 'RARA')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'TSA', cluster_B = 'Control', cohort = 'HL60', drug = 'TSA', target = 'HDAC')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA+TSA', cluster_B = 'Control', cohort = 'HL60', drug = 'ATRA + TSA', target = 'RARA + HDAC')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_ATRATSA_RARAHDAC.csv")
dat = dat[24:]
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA', cluster_B = 'Control', cohort = 'NB-4', drug = 'ATRA', target = 'RARA')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'TSA', cluster_B = 'Control', cohort = 'NB-4', drug = 'TSA', target = 'HDAC')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'ATRA+TSA', cluster_B = 'Control', cohort = 'NB-4', drug = 'ATRA + TSA', target = 'RARA + HDAC')

### UNC1999 EZH2/EZH1

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_UNC1999_EZH.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'UNC1999', cluster_B = 'Control', cohort = 'OCI-AML3', drug = 'UNC1999', target = 'EZH2/EZH1')

### Etoposide TOPO II

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_F36P = dat[:6]
dat_F36P.X = (dat_F36P.X.transpose() / dat_F36P.X.sum(axis=1)).transpose()
dat_F36P.obs['Treatment'] = [x[2] for x in dat_F36P.obs.index.str.split('_')]
dat_F36P.obs['Treatment'] = dat_F36P.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_F36P.to_df()
newdat['PC2'] = (dat_F36P.to_df() * PC2_loadings).sum(axis=1)
dat_F36P = ad.AnnData(newdat, dat_F36P.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_F36P, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'F-36P', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_HL60 = dat[6:12]
dat_HL60.X = (dat_HL60.X.transpose() / dat_HL60.X.sum(axis=1)).transpose()
dat_HL60.obs['Treatment'] = [x[2] for x in dat_HL60.obs.index.str.split('_')]
dat_HL60.obs['Treatment'] = dat_HL60.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_HL60.to_df()
newdat['PC2'] = (dat_HL60.to_df() * PC2_loadings).sum(axis=1)
dat_HL60 = ad.AnnData(newdat, dat_HL60.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_HL60, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'HL-60', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_Kasumi1 = dat[12:18]
dat_Kasumi1.X = (dat_Kasumi1.X.transpose() / dat_Kasumi1.X.sum(axis=1)).transpose()
dat_Kasumi1.obs['Treatment'] = [x[2] for x in dat_Kasumi1.obs.index.str.split('_')]
dat_Kasumi1.obs['Treatment'] = dat_Kasumi1.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_Kasumi1.to_df()
newdat['PC2'] = (dat_Kasumi1.to_df() * PC2_loadings).sum(axis=1)
dat_Kasumi1 = ad.AnnData(newdat, dat_Kasumi1.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_Kasumi1, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'Kasumi-1', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_MOLM13 = dat[18:24]
dat_MOLM13.X = (dat_MOLM13.X.transpose() / dat_MOLM13.X.sum(axis=1)).transpose()
dat_MOLM13.obs['Treatment'] = [x[2] for x in dat_MOLM13.obs.index.str.split('_')]
dat_MOLM13.obs['Treatment'] = dat_MOLM13.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_MOLM13.to_df()
newdat['PC2'] = (dat_MOLM13.to_df() * PC2_loadings).sum(axis=1)
dat_MOLM13 = ad.AnnData(newdat, dat_MOLM13.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_MOLM13, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'MOLM-13', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_MonoMac6 = dat[24:30]
dat_MonoMac6.X = (dat_MonoMac6.X.transpose() / dat_MonoMac6.X.sum(axis=1)).transpose()
dat_MonoMac6.obs['Treatment'] = [x[2] for x in dat_MonoMac6.obs.index.str.split('_')]
dat_MonoMac6.obs['Treatment'] = dat_MonoMac6.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_MonoMac6.to_df()
newdat['PC2'] = (dat_MonoMac6.to_df() * PC2_loadings).sum(axis=1)
dat_MonoMac6 = ad.AnnData(newdat, dat_MonoMac6.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_MonoMac6, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'MonoMac-6', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_MV411 = dat[30:36]
dat_MV411.X = (dat_MV411.X.transpose() / dat_MV411.X.sum(axis=1)).transpose()
dat_MV411.obs['Treatment'] = [x[2] for x in dat_MV411.obs.index.str.split('_')]
dat_MV411.obs['Treatment'] = dat_MV411.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_MV411.to_df()
newdat['PC2'] = (dat_MV411.to_df() * PC2_loadings).sum(axis=1)
dat_MV411 = ad.AnnData(newdat, dat_MV411.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_MV411, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'MV4-11', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_NB4 = dat[36:42]
dat_NB4.X = (dat_NB4.X.transpose() / dat_NB4.X.sum(axis=1)).transpose()
dat_NB4.obs['Treatment'] = [x[2] for x in dat_NB4.obs.index.str.split('_')]
dat_NB4.obs['Treatment'] = dat_NB4.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_NB4.to_df()
newdat['PC2'] = (dat_NB4.to_df() * PC2_loadings).sum(axis=1)
dat_NB4 = ad.AnnData(newdat, dat_NB4.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_NB4, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'NB-4', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_NOMO1 = dat[42:48]
dat_NOMO1.X = (dat_NOMO1.X.transpose() / dat_NOMO1.X.sum(axis=1)).transpose()
dat_NOMO1.obs['Treatment'] = [x[2] for x in dat_NOMO1.obs.index.str.split('_')]
dat_NOMO1.obs['Treatment'] = dat_NOMO1.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_NOMO1.to_df()
newdat['PC2'] = (dat_NOMO1.to_df() * PC2_loadings).sum(axis=1)
dat_NOMO1 = ad.AnnData(newdat, dat_NOMO1.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_NOMO1, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'NOMO-1', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_OCIAML2 = dat[48:54]
dat_OCIAML2.X = (dat_OCIAML2.X.transpose() / dat_OCIAML2.X.sum(axis=1)).transpose()
dat_OCIAML2.obs['Treatment'] = [x[2] for x in dat_OCIAML2.obs.index.str.split('_')]
dat_OCIAML2.obs['Treatment'] = dat_OCIAML2.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_OCIAML2.to_df()
newdat['PC2'] = (dat_OCIAML2.to_df() * PC2_loadings).sum(axis=1)
dat_OCIAML2 = ad.AnnData(newdat, dat_OCIAML2.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_OCIAML2, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'OCI-AML2', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_OCIAML3 = dat[54:60]
dat_OCIAML3.X = (dat_OCIAML3.X.transpose() / dat_OCIAML3.X.sum(axis=1)).transpose()
dat_OCIAML3.obs['Treatment'] = [x[2] for x in dat_OCIAML3.obs.index.str.split('_')]
dat_OCIAML3.obs['Treatment'] = dat_OCIAML3.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_OCIAML3.to_df()
newdat['PC2'] = (dat_OCIAML3.to_df() * PC2_loadings).sum(axis=1)
dat_OCIAML3 = ad.AnnData(newdat, dat_OCIAML3.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_OCIAML3, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'OCI-AML3', drug = 'Etoposide', target = 'TOPO II')

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Cell_Lines/CIBERSORTx_Etoposide_TOPOII.csv")
dat_THP1 = dat[60:]
dat_THP1.X = (dat_THP1.X.transpose() / dat_THP1.X.sum(axis=1)).transpose()
dat_THP1.obs['Treatment'] = [x[2] for x in dat_THP1.obs.index.str.split('_')]
dat_THP1.obs['Treatment'] = dat_THP1.obs['Treatment'].str.replace('VP16','Etoposide').str.replace('CTR', 'Control')

In [None]:
newdat = dat_THP1.to_df()
newdat['PC2'] = (dat_THP1.to_df() * PC2_loadings).sum(axis=1)
dat_THP1 = ad.AnnData(newdat, dat_THP1.obs)
del(newdat)

In [None]:
df = get_scores(df, dat_THP1, classification = 'Treatment', cluster_A = 'Etoposide', cluster_B = 'Control', cohort = 'THP-1', drug = 'Etoposide', target = 'TOPO II')

***

## Summarize 

In [None]:
df['CellType'] = df['names'].astype('category').cat.set_categories(['LSPC-Quiescent', 'LSPC-Primed', 'LSPC-Cycle', 'GMP-like', 'ProMono-like', 'Mono-like', 'cDC-like', 'PC2'])

In [None]:
df['logPval_dir'] = np.where(df['scores'] < 0, np.log(df['pvals']), -np.log(df['pvals']))
df['Treatment'] = np.where(df['Cohort']=="Primary", df['Drug'], df['Drug'] + " [" + df['Cohort'].str.replace('Primary ', '') + "]")

In [None]:
pd.set_option('display.max_rows', 1500)
df.CellType = df.names

In [None]:
df.to_csv('Data/Fig5_Literature_DrugResponse/Drug_Cell_Line_pvalues.csv')

# Run on Primary Samples

In [None]:
def get_scores(df, dat, classification, cluster_A, cluster_B, cohort, drug, target):
    # Absolute Scores
        
    ## Wilcox
    temp = get_ranklist(dat, classification, cluster_A, cluster_B, test='wilcoxon')
    temp['Cohort'] = cohort
    temp['Drug'] = drug
    temp['Target'] = target
    temp['Test'] = 'wilcox'
    temp['CellType'] = temp['names']
    temp['logPval_dir'] = np.where(temp['scores'] < 0, np.log(temp['pvals']), -np.log(temp['pvals']))
    temp['Treatment'] = np.where(temp['Cohort']=="Primary", temp['Drug'], temp['Drug'] + " [" + temp['Cohort'].str.replace('Primary ', '') + "]")
    df = df.append(temp)
    
    return(df)

In [None]:
df = pd.DataFrame()

### AlCAr Pyrimidine Synthesis

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_AlCAr_PyrimidineSynthesis_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('[0-9]','').str.replace('ctrl', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'AlCAr', 'Control', 'Primary', 'AlCAr', 'Pyrimidine Synthesis')

### 5-Aza BAY1436032 IDH

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_AzaBay_HypomethIDH1_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-2] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = np.where(dat.obs['Treatment'] == "AZA-PRL", "5-Aza + BAY1436032", dat.obs['Treatment'])
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace("Azacitidine", "5-Aza").str.replace("PRL", "5-Aza + BAY1436032")

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'BAY1436032', 'Control', 'Primary IDH1mut', 'BAY1436032', 'IDH')

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza', 'Control', 'Primary IDH1mut', '5-Aza', 'HMA')

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza + BAY1436032', 'Control', 'Primary IDH1mut', '5-Aza + BAY1436032', 'HMA + IDH')

### BCL6 Fx1

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_Fx1_BCL6_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-2] for x in dat.obs.index.str.split('.')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'Fx1', 'Control', 'Primary TP53mut', 'Fx1', 'BCL6')

### BET CDK9 JQ1 CDKI-73

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_JQ1CDKI-73_BETCDK9_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('ctl', 'Control').str.replace('CDKi', 'CDKI-73').str.replace('comb', 'JQ1 + CDKI-73')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'JQ1', 'Control', 'Primary MLL-r', 'JQ1', 'BET')

In [None]:
df = get_scores(df, dat, 'Treatment', 'CDKI-73', 'Control', 'Primary MLL-r', 'CDKI-73', 'CDK9')

In [None]:
df = get_scores(df, dat, 'Treatment', 'JQ1 + CDKI-73', 'Control', 'Primary MLL-r', 'JQ1 + CDKI-73', 'BET + CDK9')

### HMA LSD1 5-Aza GSK2879552

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_AzaGSK_HMALSD1_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('[0-9]','')
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control').str.replace("Aza", "5-Aza").str.replace("LSDi", "GSK2879552").str.replace("GSK2879552plus5-Aza", "5-Aza + GSK2879552")

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza', 'Control', 'Primary TET2mut', '5-Aza', 'HMA')

In [None]:
df = get_scores(df, dat, 'Treatment', 'GSK2879552', 'Control', 'Primary TET2mut', 'GSK2879552', 'LSD1')

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza + GSK2879552', 'Control', 'Primary TET2mut', '5-Aza + GSK2879552', 'HMA + LSD1')

### HMA FLT3 5-Aza Crenolanib

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_CrenolanibAza_FLT3Hypometh_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-2] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control').str.replace("AZA", "5-Aza").str.replace("Creno", "Crenolanib").str.replace("+", " + ")

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza', 'Control', 'Primary FLT3-ITDmut', '5-Aza', 'HMA')

In [None]:
df = get_scores(df, dat, 'Treatment', 'Crenolanib', 'Control', 'Primary FLT3-ITDmut', 'Crenolanib', 'FLT3')

In [None]:
df = get_scores(df, dat, 'Treatment', '5-Aza + Crenolanib', 'Control', 'Primary FLT3-ITDmut', '5-Aza + Crenolanib', 'HMA + FLT3')

### CD105 TRC105

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_TRC105_CD105_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('IgG', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'TRC105', 'Control', 'Primary', 'TRC105', 'CD105')

### XPO1 KPT

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_KPT_XPO1_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[-1] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = np.where(dat.obs['Treatment'] == 'KPT', 'Selinexor', 'DMSO')
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, 'Treatment', 'Selinexor', 'Control', 'Primary', 'KPT330', 'XPO1')

### HDAC/NAMPT AR-42 KPT-9274

In [None]:
dat = load_fractions("Data/Fig5_Literature_DrugResponse/Primary/CIBERSORTx_AR42KPT9274_HDAC8NAMPT_Primary.csv")
dat.X = (dat.X.transpose() / dat.X.sum(axis=1)).transpose()
dat.obs['Treatment'] = [x[0] for x in dat.obs.index.str.split('_')]
dat.obs['Treatment'] = dat.obs['Treatment'].str.replace('DMSO', 'Control')

In [None]:
newdat = dat.to_df()
newdat['PC2'] = (dat.to_df() * PC2_loadings).sum(axis=1)
dat = ad.AnnData(newdat, dat.obs)
del(newdat)

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'AR-42', cluster_B = 'Control', cohort = 'Primary', drug = 'AR-42', target = 'HDAC8')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'KPT-9274', cluster_B = 'Control', cohort = 'Primary', drug = 'KPT-9274', target = 'NAMPT')

In [None]:
df = get_scores(df, dat, classification = 'Treatment', cluster_A = 'AR-42+KPT-9274', cluster_B = 'Control', cohort = 'Primary', drug = 'AR-42+KPT-9274', target = 'HDAC8 + NAMPT')

# Export Dataframe 

In [None]:
df.to_csv('Data/Fig5_Literature_DrugResponse/Drug_Primary_pvalues.csv')