**Aims:** Extract results from the CRISPRi screen processing outputs and generate plots for the manuscript.

- [ ] A table of the V3 data including the parent veh vs DNAPKi, KO veh vs DNAPKi, and parent veh vs KO veh comparisons (Table S3) - I will use this to replot the stuff in Fig 5
- [ ] Some kind of QC plot for the V3 data - a PCA plot would be great if it looks okay and I'll put that in a supp figure

### Setup python session

In [1]:
# !pip uninstall -y screenpro2
# !pip install git+https://github.com/ArcInstitute/screenpro2.git@dev
# # !pip install ScreenPro2

In [1]:
from glob import glob

import numpy as np
import pandas as pd 
import anndata as ad
import scanpy as sc

import screenpro as scp
import blitzgsea as blitz

import matplotlib.pyplot as plt

sc.settings.set_figure_params(
    dpi=150, format='svg', frameon=False, figsize=(3, 3), color_map='RdGy', 
    facecolor='white', 
    vector_friendly=True
)

In [2]:
import datetime

import matplotlib
import matplotlib.ticker as ticker

from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import font_manager as fm
from matplotlib import rcParams, rc_context

from screenpro.plotting._utils import almost_black, dark2


matplotlib.use('cairo')

font_files = fm.findSystemFonts(fontpaths='/home/abea/miniconda3/envs/screenpro2/fonts/', fontext='ttf')

for font_file in font_files:
    fm.fontManager.addfont(font_file)


# {f.name for f in matplotlib.font_manager.fontManager.ttflist}

rcParams['font.sans-serif'] = 'Helvetica'
rcParams['font.family'] = ['Helvetica']
rcParams['figure.dpi'] = 140

rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42

In [3]:
drug_names = {
    'Pi': 'PARPi',
    'Ri': 'ATRi',
    'Wi': 'WEE1i',
    'Mi': 'ATMi',
    'Ki': 'DNAPKi',
    'PiRi': 'PARPi+ATRi',
    'PiWi': 'PARPi+WEE1i',
    'PiMi': 'PARPi+ATMi',
    'PiKi': 'PARPi+DNAPKi'
}

In [15]:
### get result tables: gene level scores with out negative controls
def getAnnotatedTables(screen, run_name, threshold):
    return dict([
        (phenotype_name, 
         screen.getPhenotypeScores(
             run_name=run_name,phenotype_name=phenotype_name,threshold=threshold, pvalue_col='ttest pvalue'
         ).reset_index().rename(columns={'index':'sgID_AB'}).set_index(
             ['sgID_AB','target','transcript'])
            )
        for phenotype_name in screen.listPhenotypeScores(run_name=run_name)
    ])


### get annotated result table: rho scores
def get_annotated_result_table(screen, run_name, name_it = '', threshold=6):
    return pd.concat(dict([
        # (drug_names[k.split(':')[1].split('_vs_')[0]],table) for k, table in 
        # (name_it + k.split(':')[1],table) for k, table in 
        (name_it + k,table) for k, table in 
        getAnnotatedTables(screen,threshold=threshold,run_name=run_name).items()
        if 'rho' in k or 'gamma' in k
    ]),axis=1)

In [16]:
### pathway analysis
pager_dir = "/home/abea/tools/pager/"
pager_annotation_path = '/home/abea/tools/pager/annotations/human'

c5_gobp_gmt = blitz.enrichr.read_gmt(
    f'{pager_annotation_path}/msigdb_v7.4_c5.go.bp/c5.go.bp.v7.4.symbols.gmt'
)


def run_rho_gsea_directional(df,var_col,gmt,min_size=15,max_size=150):
    
    signature = df.query('target!="negative_control"')[var_col].reset_index().drop(
        columns=['transcript','sgID_AB']
    ).copy()

    result = blitz.gsea(
        signature=signature,
        library=gmt,
        min_size=min_size,
        max_size=max_size,
        verbose=True
    )
    
    return signature, result

___

### Load Data
Load Screen Processing guide level and gene level result tables.

In [6]:
crispri_v3_adata = sc.read_h5ad('screens/A549_PRDX1_CRISPRi_v3.h5ad.gz')

In [7]:
crispri_v3_screen = scp.load._read_screen_pkl('screens/A549_PRDX1_CRISPRi_v3_screens')

In [8]:
crispri_v3_screen.phenotypes.keys()

dict_keys(['compare_reps_parent', 'compare_reps_PRDX1KO', 'compare_reps_vehicle'])

___

In [9]:
# pd.concat(),axis=1).dropna()

In [10]:
crispri_v3_screen.phenotypes.keys()

dict_keys(['compare_reps_parent', 'compare_reps_PRDX1KO', 'compare_reps_vehicle'])

In [20]:
crispri_v3_screen_result_tables = pd.concat([
    get_annotated_result_table(
        crispri_v3_screen,run_name='compare_reps_PRDX1KO', name_it='PRDX1KO::'
    ),
    get_annotated_result_table(
        crispri_v3_screen,run_name='compare_reps_parent', name_it='parent::'
    ),

    get_annotated_result_table(
        crispri_v3_screen,run_name='compare_reps_vehicle', name_it='vehicle::'
    )
],axis=1).sort_index(level=[1], ascending=[True])

In [22]:
crispri_v3_screen_result_tables.columns.get_level_values(0).unique()

Index(['PRDX1KO::gamma:vehicle_vs_T0', 'PRDX1KO::rho:DNAPKi_vs_vehicle',
       'parent::gamma:vehicle_vs_T0', 'parent::rho:DNAPKi_vs_vehicle',
       'vehicle::gamma:parent_vs_T0', 'vehicle::rho:PRDX1KO_vs_parent'],
      dtype='object')

In [24]:
crispri_v3_screen_gsea_results = {}

for comparison in crispri_v3_screen_result_tables.columns.get_level_values(0).unique():

    if 'gamma' not in comparison:
        print(comparison)
        
        _, res = run_rho_gsea_directional(
            crispri_v3_screen_result_tables[comparison].dropna(),
            var_col='score', 
            gmt=c5_gobp_gmt
        )

        crispri_v3_screen_gsea_results[comparison] = res

PRDX1KO::rho:DNAPKi_vs_vehicle
Use cached anchor parameters


Enrichment : 100%|██████████| 7481/7481 [00:01<00:00, 6382.62it/s]
  pvals_corrected = -np.expm1(ntests * np.log1p(-pvals))


parent::rho:DNAPKi_vs_vehicle
Use cached anchor parameters


Enrichment : 100%|██████████| 7481/7481 [00:01<00:00, 6462.04it/s]
  pvals_corrected = -np.expm1(ntests * np.log1p(-pvals))


vehicle::rho:PRDX1KO_vs_parent
Use cached anchor parameters


Enrichment : 100%|██████████| 7481/7481 [00:01<00:00, 6625.21it/s]
  pvals_corrected = -np.expm1(ntests * np.log1p(-pvals))


### save to file

? how we want to organize the results for the manuscript tables (!)

In [25]:
# with pd.ExcelWriter('CRISPRi-PRDX1-DDRi-screens.xlsx', engine='openpyxl') as writer:
#     screen.adata.obs.to_excel(writer, sheet_name='sample sheet')
#     screen.adata.to_df(layer='raw_counts').astype(int).T.to_excel(writer, sheet_name='raw counts')
#     screen.adata.to_df(layer='seq_depth_norm').astype(int).T.to_excel(writer, sheet_name='normalized counts')
#     getAnnotatedTable(screen_parent, threshold=2).to_excel(writer, sheet_name='parent screen')
#     getAnnotatedTable(screen_PRDX1KO, threshold=2).to_excel(writer, sheet_name='PRDX1KO screen')
#     getAnnotatedTable(screen_vehicle, threshold=2)[['rho:PRDX1KO_vs_parent']].to_excel(writer, sheet_name='vehicle screen')

Table S3

In [28]:
with pd.ExcelWriter('tables/Table-S3.xlsx', engine='openpyxl') as writer:
    crispri_v3_screen_result_tables.to_excel(writer, sheet_name='Gene Level Phenotypes')
    pd.concat(crispri_v3_screen_gsea_results,axis=1).to_excel(writer, sheet_name='GO GSEA Analysis')

# 

In [167]:
%reload_ext watermark

In [168]:
%watermark

Last updated: 2024-09-25T00:59:44.332049-07:00

Python implementation: CPython
Python version       : 3.11.9
IPython version      : 8.27.0

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 5.15.0-119-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit



In [169]:
%watermark --iversions

pandas    : 1.5.3
matplotlib: 3.6.2
scanpy    : 1.10.3
blitzgsea : 1.3.47
screenpro : 0.4.15
numpy     : 1.26.4
anndata   : 0.10.9



___