In [1]:
import sys
import pandas as pd
from scipy import stats
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
# from venn import venn 
# from upsetplot import from_contents,UpSet
from matplotlib.backends.backend_pdf import PdfPages
# import stringdb

from utils import *

In [51]:
drug_names = {
    'Pi': 'PARPi',
    'Ri': 'ATRi',
    'Wi': 'WEE1i',
    'Mi': 'ATMi',
    'Ki': 'DNAPKi',
    'PiRi': 'PARPi+ATRi',
    'PiWi': 'PARPi+WEE1i',
    'PiMi': 'PARPi+ATMi',
    'PiKi': 'PARPi+DNAPKi'
}

In [5]:
# import scanpy as sc
import anndata as ad

In [6]:
import screenpro

In [7]:
from screenpro.load import loadScreenProcessingData, write_adata_pkl

In [8]:
def mergeColumnIndex(df_in):
    df = df_in.copy()
    df.columns = df.columns.map(' '.join)
    return df

In [9]:
def getScreenProcessingAnnData(data_in):
    """make `AnnData` object from an ScreenProcessing dataset
    """
    data = data_in.copy()
    
    adata = ad.AnnData(
        var = data['counts'].columns.to_frame().rename(columns={0:'treatment',1:'replicate'}).reset_index(drop=True).rename(index=lambda s: f'sample_{str(s)}'),
        obs=data['library']
    )
    
    adata.X = data['counts'].loc[adata.obs.index,:].to_numpy()
    
    adata.obsm['phenotypes'] = mergeColumnIndex(data['phenotypes'])
    
    # adata.uns['transcript scores'] = mergeColumnIndex(data['transcript scores']).reset_index().set_index('gene')
    # adata.uns['gene scores'] = mergeColumnIndex(data['gene scores'])
    adata.transcript_scores = data['transcript scores']
    adata.gene_scores = data['gene scores']

    return adata

___

### CRISPRi PRDX1-KD datasets

In [74]:
Ci_prdx1kd_adata = ad.read_h5ad('CRISPRi-PRDX1-screens/results/adata_counts.h5ad.gz')
Ci_prdx1kd_adata.gene_scores = pd.read_excel('CRISPRi-PRDX1-screens/results/phenotype_scores.xlsx',index_col=0,header=[0,1])

### CRISPRi datasets

In [26]:
Ci = loadScreenProcessingData('CRISPRi/Analysis/PiKiMiRiWi/PiKiMiRiWi_2reps')

In [27]:
Ci_adata = getScreenProcessingAnnData(Ci)

In [28]:
Ci_adata.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'DNAPKi':'rho2',
    'ATMi':'rho3',
    'ATRi':'rho4',
    'WEE1i':'rho5',
    'PARPi+DNAPKi':'rho6',
    'PARPi+ATMi':'rho7',
    'PARPi+ATRi':'rho8',
    'PARPi+WEE1i':'rho9'
}

### CRISPRa datasets

make `AnnData`object

In [13]:
Ca_k562 = loadScreenProcessingData('CRISPRa/Analysis/k562/k562a_PiRi')
Ca_A549 = loadScreenProcessingData('CRISPRa/Analysis/single_replicate_A549/PiRiWi_1stRep/CRISPRa_PiWiRi')

In [14]:
Ca_k562_adata = getScreenProcessingAnnData(Ca_k562)

In [32]:
Ca_k562_adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,rep1
sample_1,DMSO,rep2
sample_2,Pi,rep1
sample_3,Pi,rep2
sample_4,PiRi,rep1
sample_5,PiRi,rep2
sample_6,T0,rep1
sample_7,T0,rep2


In [41]:
Ca_k562_adata.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'PARPi+ATRi':'rho2',
}

___

In [15]:
Ca_A549_adata = getScreenProcessingAnnData(Ca_A549)

In [42]:
Ca_A549_adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,Rep1
sample_1,Pi,Rep1
sample_2,PiRi,Rep1
sample_3,PiWi,Rep1
sample_4,Ri,Rep1
sample_5,T0,Rep1
sample_6,Wi,Rep1


In [53]:
Ca_A549_adata.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'ATRi':'rho2',
    'WEE1i':'rho3',
    'PARPi+ATRi':'rho4',
    'PARPi+WEE1i':'rho5',
}

### Save data

In [49]:
!mkdir -p datasets

In [78]:
write_adata_pkl(Ci_prdx1kd_adata,'datasets/CRISPRi_PRDX1-KD')

Object successfully saved to "datasets/CRISPRi_PRDX1-KD.pkl"


In [55]:
write_adata_pkl(Ci_adata,'datasets/CRISPRi')
write_adata_pkl(Ca_A549_adata,'datasets/CRISPRa_A549')
write_adata_pkl(Ca_k562_adata,'datasets/CRISPRa_k562')

Object successfully saved to "datasets/CRISPRi.pkl"
Object successfully saved to "datasets/CRISPRa_A549.pkl"
Object successfully saved to "datasets/CRISPRa_k562.pkl"


In [47]:
# Ci_adata.write('datasets/CRISPRi.h5ad')
# Ca_A549_adata.write_h5ad('datasets/CRISPRa_A549.h5ad')
# Ca_k562_adata.write_h5ad('datasets/CRISPRa_k562.h5ad')

# 

In [54]:
from watermark import watermark
print(
    watermark()
)
print('_'*80)
print(
    watermark(iversions=True, globals_=globals())
)

Last updated: 2023-08-14T12:44:37.094437-07:00

Python implementation: CPython
Python version       : 3.9.16
IPython version      : 8.14.0

Compiler    : GCC 11.3.0
OS          : Linux
Release     : 3.10.0-957.27.2.el7.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit

________________________________________________________________________________
matplotlib: 3.7.2
pandas    : 2.0.3
seaborn   : 0.12.2
sys       : 3.9.16 | packaged by conda-forge | (main, Feb  1 2023, 21:39:03) 
[GCC 11.3.0]
screenpro : 0.2.1
numpy     : 1.24.4
anndata   : 0.9.1
scipy     : 1.11.1

