In [1]:
# !pip uninstall -y screenpro2
# # !pip install git+https://github.com/ArcInstitute/screenpro2.git@dev
# !pip install ScreenPro2

In [2]:
drug_names = {
    'Pi': 'PARPi',
    'Ri': 'ATRi',
    'Wi': 'WEE1i',
    'Mi': 'ATMi',
    'Ki': 'DNAPKi',
    'PiRi': 'PARPi+ATRi',
    'PiWi': 'PARPi+WEE1i',
    'PiMi': 'PARPi+ATMi',
    'PiKi': 'PARPi+DNAPKi'
}

In [3]:
from glob import glob

import numpy as np
import pandas as pd 
import anndata as ad
import scanpy as sc

import screenpro as scp

import matplotlib.pyplot as plt

sc.settings.set_figure_params(
    dpi=150, format='svg', 
    frameon=False, 
    figsize=(3, 3), 
    color_map='RdGy', 
    facecolor='white', 
    vector_friendly=True
)

In [4]:
import datetime

import matplotlib
import matplotlib.ticker as ticker

from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import font_manager as fm
from matplotlib import rcParams, rc_context

from screenpro.plotting._utils import almost_black, dark2


matplotlib.use('cairo')

font_files = fm.findSystemFonts(fontpaths='/home/abea/miniconda3/envs/screenpro2/fonts/', fontext='ttf')

for font_file in font_files:
    fm.fontManager.addfont(font_file)


# {f.name for f in matplotlib.font_manager.fontManager.ttflist}

rcParams['font.sans-serif'] = 'Helvetica'
rcParams['font.family'] = ['Helvetica']
rcParams['figure.dpi'] = 140

rcParams['pdf.fonttype'] = 42
rcParams['ps.fonttype'] = 42

## Step 1: FASTQ processing


In [5]:
library_table = pd.read_csv(
    'CRISPRi_v2_human_librarytable.txt.gz', 
    sep = '\t', index_col=False, low_memory=False)

In [6]:
samples = [
    s.split('/')[-1].replace('.fastq.gz','')
    for s in glob('fastq/A549_CRISPRi*.fastq.gz')
]

samples.sort()

In [7]:
samples

['A549_CRISPRi_DMSO_rep1',
 'A549_CRISPRi_DMSO_rep2',
 'A549_CRISPRi_Ki_rep1',
 'A549_CRISPRi_Ki_rep2',
 'A549_CRISPRi_Mi_rep1',
 'A549_CRISPRi_Mi_rep2',
 'A549_CRISPRi_PiKi_rep1',
 'A549_CRISPRi_PiKi_rep2',
 'A549_CRISPRi_PiMi_rep1',
 'A549_CRISPRi_PiMi_rep2',
 'A549_CRISPRi_PiRi_rep1',
 'A549_CRISPRi_PiRi_rep2',
 'A549_CRISPRi_PiWi_rep1',
 'A549_CRISPRi_PiWi_rep2',
 'A549_CRISPRi_Pi_rep1',
 'A549_CRISPRi_Pi_rep2',
 'A549_CRISPRi_Ri_rep1',
 'A549_CRISPRi_Ri_rep2',
 'A549_CRISPRi_T0_rep1',
 'A549_CRISPRi_T0_rep2',
 'A549_CRISPRi_Wi_rep1',
 'A549_CRISPRi_Wi_rep2']

### run `guideCounter`

In [8]:
counter = scp.GuideCounter(cas_type = 'cas9', library_type = 'single_guide_design')

In [9]:
library_table

Unnamed: 0,sgId,sublibrary,gene,transcripts,sequence
0,A1BG_-_58858617.23-P1,h3_top5,A1BG,P1,GGAGACCCAGCGCTAACCAG
1,A1BG_-_58858788.23-P1,h3_top5,A1BG,P1,GGGGCACCCAGGAGCGGTAG
2,A1BG_+_58858964.23-P1,h3_top5,A1BG,P1,GCTCCGGGCGACGTGGAGTG
3,A1BG_-_58858630.23-P1,h3_top5,A1BG,P1,GAACCAGGGGTGCCCAAGGG
4,A1BG_+_58858549.23-P1,h3_top5,A1BG,P1,GGCGAGGAACCGCCCAGCAA
...,...,...,...,...,...
209065,non-targeting_03785,h7_supp5,negative_control,na,GCTACTCCGCCCCGCGGGAG
209066,non-targeting_03786,h7_supp5,negative_control,na,GTGGCCGTTCATGGGACCGG
209067,non-targeting_03787,h7_supp5,negative_control,na,GAACTCTGTAGAAGGGACCG
209068,non-targeting_03788,h7_supp5,negative_control,na,GCTCGACAGCGACTGAAGAG


In [10]:
counter.load_library(
    'CRISPRi_v2_human_librarytable.txt.gz', sep = '\t', index_col=False,
    # 'JR_V3lib_top2_v2.csv', sep = ',', 
    verbose = True,
    low_memory=False
)

Trimming protospacer sequences in 'protospacer' column.
Library table successfully loaded.
total # of cas9 sgRNAs: 205648


In [11]:
counter.get_counts_matrix(
    fastq_dir = 'fastq',
    samples = samples,
    # write='force',
    trim_first_g=True,
    verbose = True
)

[1;32mA549_CRISPRi_DMSO_rep1[0m
count file exists ...
% mapped reads 88.63374797857534
[1;32mA549_CRISPRi_DMSO_rep2[0m
count file exists ...
% mapped reads 80.83309342989601
[1;32mA549_CRISPRi_Ki_rep1[0m
count file exists ...
% mapped reads 90.24498424671202
[1;32mA549_CRISPRi_Ki_rep2[0m
count file exists ...
% mapped reads 89.39757479089117
[1;32mA549_CRISPRi_Mi_rep1[0m
count file exists ...
% mapped reads 89.78796940558229
[1;32mA549_CRISPRi_Mi_rep2[0m
count file exists ...
% mapped reads 89.46854195668756
[1;32mA549_CRISPRi_PiKi_rep1[0m
count file exists ...
% mapped reads 90.54211297399893
[1;32mA549_CRISPRi_PiKi_rep2[0m
count file exists ...
% mapped reads 89.53799107315274
[1;32mA549_CRISPRi_PiMi_rep1[0m
count file exists ...
% mapped reads 89.40129422987994
[1;32mA549_CRISPRi_PiMi_rep2[0m
count file exists ...
% mapped reads 89.56277272210386
[1;32mA549_CRISPRi_PiRi_rep1[0m
count file exists ...
% mapped reads 88.53951639574869
[1;32mA549_CRISPRi_PiRi_rep2

In [12]:
counter.counts_mat.sum() / 10**6

A549_CRISPRi_DMSO_rep1     48.718589
A549_CRISPRi_DMSO_rep2     28.086767
A549_CRISPRi_Ki_rep1       37.695912
A549_CRISPRi_Ki_rep2       23.865067
A549_CRISPRi_Mi_rep1       32.910419
A549_CRISPRi_Mi_rep2       46.517719
A549_CRISPRi_PiKi_rep1     37.236937
A549_CRISPRi_PiKi_rep2     78.800999
A549_CRISPRi_PiMi_rep1     37.714290
A549_CRISPRi_PiMi_rep2    108.729121
A549_CRISPRi_PiRi_rep1     43.599360
A549_CRISPRi_PiRi_rep2     19.454159
A549_CRISPRi_PiWi_rep1     40.225769
A549_CRISPRi_PiWi_rep2     37.929204
A549_CRISPRi_Pi_rep1       45.441083
A549_CRISPRi_Pi_rep2       33.736242
A549_CRISPRi_Ri_rep1       54.166237
A549_CRISPRi_Ri_rep2       32.538540
A549_CRISPRi_T0_rep1       48.168472
A549_CRISPRi_T0_rep2       33.978428
A549_CRISPRi_Wi_rep1       86.448619
A549_CRISPRi_Wi_rep2       34.393649
dtype: float64

In [13]:
adata = counter.build_counts_anndata()

In [14]:
adata.obs['condition'] = adata.obs.index.str.split('_').str[-2]
adata.obs['replicate'] = adata.obs.index.str[-1:].astype(int)
# adata.obs['pop_doublings'] = [1 if cond != 'T0' else 0 for cond in adata.obs['condition']]

adata.var['targetType'] = ['gene' if t != 'negative_control' else 'negative_control' for t in adata.var.target]

In [15]:
adata.obs

Unnamed: 0,condition,replicate
A549_CRISPRi_DMSO_rep1,DMSO,1
A549_CRISPRi_DMSO_rep2,DMSO,2
A549_CRISPRi_Ki_rep1,Ki,1
A549_CRISPRi_Ki_rep2,Ki,2
A549_CRISPRi_Mi_rep1,Mi,1
A549_CRISPRi_Mi_rep2,Mi,2
A549_CRISPRi_PiKi_rep1,PiKi,1
A549_CRISPRi_PiKi_rep2,PiKi,2
A549_CRISPRi_PiMi_rep1,PiMi,1
A549_CRISPRi_PiMi_rep2,PiMi,2


In [16]:
adata.write_h5ad('A549_CRISPRi_screens.h5ad.gz', compression='gzip')

### load counts and metadata

In [17]:
adata = ad.read_h5ad('A549_CRISPRi_screens.h5ad.gz')

# adata.obs = meta 
# adata.obs.treatment = adata.obs.treatment.str.replace('-','T0')
# adata.obs = adata.obs.rename(columns={'treatment':'condition','rep':'replicate'})

# adata.obs['pop_doublings'] = meta['pop doublings'].replace('-',np.nan).astype(float)

In [18]:
adata.var['transcript'] = library_table.set_index('sgId').loc[adata.var.index, 'transcripts']
adata.obs['replicate'] = adata.obs['replicate'].astype(int)

___
<!-- - filter low counts -->

In [19]:
adata0 = adata.copy()

___

# Step 2: Phenotype calculation

- [x] Run `calculateDrugScreen` with `score_level="compare_reps"` for guide-level analysis
- [x] Run `calculateDrugScreen` with `score_level="compare_guides"` for gene-level analysis

### run phenoscore

In [20]:
screen = scp.PooledScreens(adata, verbose=True, n_reps=2)

screen.filterLowCounts()

screen.countNormalization()

104515 variables with less than 1 reads (filter_type: 'all')
Pseudocount added to counts.
Counts normalized by sequencing depth.


In [21]:
screen.calculateDrugScreen(
    score_level='compare_reps',
    untreated='DMSO', treated=[
        'Pi','Ri','PiRi',
        'Mi','PiMi',
        'Wi','PiWi',
        'Ki','PiKi'
    ],
    t0='T0', 
    # count_filter_type='either',
    var_names=['target','transcript'],
)



	DMSO vs T0




	Pi vs T0
	Pi vs DMSO




	Ri vs T0
	Ri vs DMSO




	PiRi vs T0
	PiRi vs DMSO




	Mi vs T0
	Mi vs DMSO




	PiMi vs T0
	PiMi vs DMSO




	Wi vs T0
	Wi vs DMSO




	PiWi vs T0
	PiWi vs DMSO




	Ki vs T0
	Ki vs DMSO




	PiKi vs T0
	PiKi vs DMSO


In [22]:
screen.calculateDrugScreen(
    score_level='compare_guides',
    untreated='DMSO', treated=[
        'Pi','Ri','PiRi',
        'Mi','PiMi',
        'Wi','PiWi',
        'Ki','PiKi'
    ],
    t0='T0', 
    # count_filter_type='either',
    keep_top_n = 3,
    var_names=['target','transcript'],
    collapse_var='target'
    # run_name='compare_guides_top_3',
)



	DMSO vs T0




	Pi vs T0
	Pi vs DMSO




	Ri vs T0
	Ri vs DMSO




	PiRi vs T0
	PiRi vs DMSO




	Mi vs T0
	Mi vs DMSO




	PiMi vs T0
	PiMi vs DMSO




	Wi vs T0
	Wi vs DMSO




	PiWi vs T0
	PiWi vs DMSO




	Ki vs T0
	Ki vs DMSO




	PiKi vs T0
	PiKi vs DMSO


___

In [25]:
scp.load._write_screen_pkl(screen,'A549_CRISPRi_screens')

Object successfully saved to "A549_CRISPRi_screens.pkl"


___

# 

In [34]:
%reload_ext watermark

In [35]:
%watermark

Last updated: 2024-09-22T00:29:45.118497-07:00

Python implementation: CPython
Python version       : 3.11.9
IPython version      : 8.27.0

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 5.15.0-119-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit



In [36]:
%watermark --iversions

matplotlib: 3.6.2
pandas    : 1.5.3
anndata   : 0.10.9
scanpy    : 1.10.3
screenpro : 0.4.14
numpy     : 1.26.4
blitzgsea : 1.3.47



___