**Aims:** Extract results from the CRISPRi screen processing outputs and generate plots for the manuscript.

- [x] PCA plot of CRISPRi screen rho results
- [ ] ...

### Setup python session

In [1]:
# !pip uninstall -y screenpro2
# !pip install git+https://github.com/ArcInstitute/screenpro2.git@dev
# # !pip install ScreenPro2

In [2]:
from glob import glob

import numpy as np
import pandas as pd 
import anndata as ad
import scanpy as sc

import screenpro as scp
import blitzgsea as blitz

import matplotlib.pyplot as plt

sc.settings.set_figure_params(
    dpi=150, format='svg', frameon=False, figsize=(3, 3), color_map='RdGy', 
    facecolor='white', 
    vector_friendly=True
)

In [3]:
import datetime

import matplotlib
import matplotlib.ticker as ticker

from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import font_manager as fm
from matplotlib import rcParams, rc_context

from screenpro.plotting._utils import almost_black, dark2


matplotlib.use('cairo')

font_files = fm.findSystemFonts(fontpaths='/home/abea/miniconda3/envs/screenpro2/fonts/', fontext='ttf')

for font_file in font_files:
    fm.fontManager.addfont(font_file)


# {f.name for f in matplotlib.font_manager.fontManager.ttflist}

rcParams['font.sans-serif'] = 'Helvetica'
rcParams['font.family'] = ['Helvetica']
rcParams['figure.dpi'] = 140

rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42

In [4]:
drug_names = {
    'Pi': 'PARPi',
    'Ri': 'ATRi',
    'Wi': 'WEE1i',
    'Mi': 'ATMi',
    'Ki': 'DNAPKi',
    'PiRi': 'PARPi+ATRi',
    'PiWi': 'PARPi+WEE1i',
    'PiMi': 'PARPi+ATMi',
    'PiKi': 'PARPi+DNAPKi'
}

In [14]:
### extract result tables from screen object
def extract_result_tables(screen):
    result_tables = []

    for phenotype_name in screen.listPhenotypeScores(run_name='compare_guides'):
        if 'rho' in phenotype_name:
            result_tables.append((phenotype_name,
                screen.getPhenotypeScores(
                    run_name='compare_guides', phenotype_name=phenotype_name,
                    pvalue_col = 'ttest pvalue',
                    threshold=6
                ).query('target!="negative_control"').set_index(['target','transcript'])
            ))
        
    result_tables = dict(result_tables)

    return result_tables


### get result tables: gene level scores with out negative controls
def getAnnotatedTables(screen, run_name, threshold):
    return dict([
        (phenotype_name, 
         screen.getPhenotypeScores(
             run_name=run_name,phenotype_name=phenotype_name,threshold=threshold, pvalue_col='ttest pvalue'
         ).query('target!="negative_control"').set_index(['target','transcript']))
        for phenotype_name in screen.listPhenotypeScores(run_name=run_name)
    ])


### get annotated result table: rho scores
def get_annotated_result_table(screen, run_name, threshold=6):
    return pd.concat(dict([
        (drug_names[k.split(':')[1].split('_vs_')[0]],table) for k, table in 
        getAnnotatedTables(screen,threshold=threshold,run_name=run_name).items()
        if 'rho' in k
    ]),axis=1).dropna()

In [7]:
### pathway analysis
pager_dir = "/home/abea/tools/pager/"
pager_annotation_path = '/home/abea/tools/pager/annotations/human'

c5_gobp_gmt = blitz.enrichr.read_gmt(
    f'{pager_annotation_path}/msigdb_v7.4_c5.go.bp/c5.go.bp.v7.4.symbols.gmt'
)


def run_rho_gsea_directional(df,var_col,gmt,min_size=15,max_size=150):
    signature = df[var_col].reset_index().drop(columns='transcript').copy()

    result = blitz.gsea(
        signature=signature,
        library=gmt,
        min_size=min_size,
        max_size=max_size,
        verbose=True
    )
    
    return signature, result

___

### Load Data
Load Screen Processing guide level and gene level result tables.

In [8]:
crispri_v3_screen = scp.load._read_screen_pkl('screens/A549_PRDX1_CRISPRi_v3_screens')

In [15]:
crispri_v3_screen.phenotypes.keys()

dict_keys(['compare_reps_parent', 'compare_reps_PRDX1KO', 'compare_reps_vehicle'])

In [19]:
crispri_v3_screen.adata.var

Unnamed: 0,sgID_A,sgID_B,target_A,protospacer_A,target_B,protospacer_B,targetType,target,sequence
A1BG_+_58858964.23-P1|A1BG_-_58858788.23-P1,A1BG_+_58858964.23-P1,A1BG_-_58858788.23-P1,A1BG,CTCCGGGCGACGTGGAGTG,A1BG,GGGCACCCAGGAGCGGTAG,gene,A1BG,CTCCGGGCGACGTGGAGTG;GGGCACCCAGGAGCGGTAG
A1BG_-_58858991.23-P1|A1BG_-_58858950.23-P1,A1BG_-_58858991.23-P1,A1BG_-_58858950.23-P1,A1BG,TCCACGTCGCCCGGAGCTG,A1BG,GCAGCGCAGGACGGCATCT,gene,A1BG,TCCACGTCGCCCGGAGCTG;GCAGCGCAGGACGGCATCT
A1BG_-_58858915.23-P1|A1BG_-_58858922.23-P1,A1BG_-_58858915.23-P1,A1BG_-_58858922.23-P1,A1BG,AGCAGCTCGAAGGTGACGT,A1BG,CGAAGGTGACGTCGGGGAT,gene,A1BG,AGCAGCTCGAAGGTGACGT;CGAAGGTGACGTCGGGGAT
A1BG_-_58864840.23-P2|A1BG_-_58864822.23-P2,A1BG_-_58864840.23-P2,A1BG_-_58864822.23-P2,A1BG,CCGGTGCAGTGAGTGTCTG,A1BG,ATGATGGTCGCGCTCACTC,gene,A1BG,CCGGTGCAGTGAGTGTCTG;ATGATGGTCGCGCTCACTC
A1BG_+_58864705.23-P2|A1BG_+_58864644.23-P2,A1BG_+_58864705.23-P2,A1BG_+_58864644.23-P2,A1BG,GGGAACAGGAGCCTTACGG,A1BG,CAGCCATATGTGAGTGCAG,gene,A1BG,GGGAACAGGAGCCTTACGG;CAGCCATATGTGAGTGCAG
...,...,...,...,...,...,...,...,...,...
non-targeting_03783|non-targeting_02098,non-targeting_03783,non-targeting_02098,negative_control,GAGCTAGGATCGAACGACA,negative_control,ATCAAAATCGACGCGTCTA,negative_control,negative_control,GAGCTAGGATCGAACGACA;ATCAAAATCGACGCGTCTA
non-targeting_03785|non-targeting_00377,non-targeting_03785,non-targeting_00377,negative_control,CTACTCCGCCCCGCGGGAG,negative_control,GTAGTGCGCTTCCCCGAAG,negative_control,negative_control,CTACTCCGCCCCGCGGGAG;GTAGTGCGCTTCCCCGAAG
non-targeting_03786|non-targeting_00360,non-targeting_03786,non-targeting_00360,negative_control,TGGCCGTTCATGGGACCGG,negative_control,TCGAGGACGCACCCCATAA,negative_control,negative_control,TGGCCGTTCATGGGACCGG;TCGAGGACGCACCCCATAA
non-targeting_03786|non-targeting_02100,non-targeting_03786,non-targeting_02100,negative_control,TGGCCGTTCATGGGACCGG,negative_control,CGGGGGCCACGGCGGTAAG,negative_control,negative_control,TGGCCGTTCATGGGACCGG;CGGGGGCCACGGCGGTAAG


In [16]:
get_annotated_result_table(crispri_v3_screen,run_name='compare_reps_parent')

KeyError: "None of ['transcript'] are in the columns"

### save to file

? how we want to organize the results for the manuscript tables (!)

In [None]:
with pd.ExcelWriter('CRISPRi-PRDX1-DDRi-screens.xlsx', engine='openpyxl') as writer:
    screen.adata.obs.to_excel(writer, sheet_name='sample sheet')
    screen.adata.to_df(layer='raw_counts').astype(int).T.to_excel(writer, sheet_name='raw counts')
    screen.adata.to_df(layer='seq_depth_norm').astype(int).T.to_excel(writer, sheet_name='normalized counts')
    getAnnotatedTable(screen_parent, threshold=2).to_excel(writer, sheet_name='parent screen')
    getAnnotatedTable(screen_PRDX1KO, threshold=2).to_excel(writer, sheet_name='PRDX1KO screen')
    getAnnotatedTable(screen_vehicle, threshold=2)[['rho:PRDX1KO_vs_parent']].to_excel(writer, sheet_name='vehicle screen')

# 

In [22]:
%reload_ext watermark

In [23]:
%watermark

Last updated: 2024-09-24T00:07:00.695852-07:00

Python implementation: CPython
Python version       : 3.11.9
IPython version      : 8.27.0

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 5.15.0-119-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit



In [24]:
%watermark --iversions

screenpro : 0.4.15
numpy     : 1.26.4
scanpy    : 1.10.3
matplotlib: 3.6.2
pandas    : 1.5.3
anndata   : 0.10.9
blitzgsea : 1.3.47



___