In [1]:
import sys
import pandas as pd
from scipy import stats
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
# from venn import venn 
# from upsetplot import from_contents,UpSet
from matplotlib.backends.backend_pdf import PdfPages
# import stringdb

from utils import *

In [2]:
import scanpy as sc
import anndata as ad

In [3]:
import screenpro

In [4]:
from screenpro.load import loadScreenProcessingData, write_adata_pkl

### CRISPRi datasets

In [5]:
def mergeColumnIndex(df_in):
    df = df_in.copy()
    df.columns = df.columns.map(' '.join)
    return df

In [6]:
def getScreenProcessingAnnData(data_in):
    """make `AnnData` object from an ScreenProcessing dataset
    """
    data = data_in.copy()
    
    adata = ad.AnnData(
        var = data['counts'].columns.to_frame().rename(columns={0:'treatment',1:'replicate'}).reset_index(drop=True).rename(index=lambda s: f'sample_{str(s)}'),
        obs=data['library']
    )
    
    adata.X = data['counts'].loc[adata.obs.index,:].to_numpy()
    
    adata.obsm['phenotypes'] = mergeColumnIndex(data['phenotypes'])
    
    # adata.uns['transcript scores'] = mergeColumnIndex(data['transcript scores']).reset_index().set_index('gene')
    # adata.uns['gene scores'] = mergeColumnIndex(data['gene scores'])
    adata.transcript_scores = data['transcript scores']
    adata.gene_scores = data['gene scores']

    return adata

___

In [8]:
Ci = loadScreenProcessingData('CRISPRi/Analysis/PiKiMiRiWi/PiKiMiRiWi_2reps')

In [17]:
Ci_adata = getScreenProcessingAnnData(Ci)

In [18]:
Ci_adata.comparisons = {
    'PARPi':'rho1',
    'DNAPKi':'rho2',
    'ATMi':'rho3',
    'ATRi':'rho4',
    'WEE1i':'rho5',
    'PARPi+DNAPKi':'rho6',
    'PARPi+ATMi':'rho7',
    'PARPi+ATRi':'rho8',
    'PARPi+WEE1i':'rho9'
}

make `AnnData`object

In [19]:
Ca_k562 = loadScreenProcessingData('CRISPRa/Analysis/k562/k562a_PiRi')
Ca_A549 = loadScreenProcessingData('CRISPRa/Analysis/single_replicate_A549/PiRiWi_1stRep/CRISPRa_PiWiRi')

In [20]:
Ca_k562_adata = getScreenProcessingAnnData(Ca_k562)

In [21]:
Ca_A549_adata = getScreenProcessingAnnData(Ca_A549)

In [22]:
Ca_k562_adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,rep1
sample_1,DMSO,rep2
sample_2,Pi,rep1
sample_3,Pi,rep2
sample_4,PiRi,rep1
sample_5,PiRi,rep2
sample_6,T0,rep1
sample_7,T0,rep2


In [23]:
Ca_A549_adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,Rep1
sample_1,Pi,Rep1
sample_2,PiRi,Rep1
sample_3,PiWi,Rep1
sample_4,Ri,Rep1
sample_5,T0,Rep1
sample_6,Wi,Rep1


### Save data

In [24]:
!mkdir -p datasets

In [25]:
write_adata_pkl(Ci_adata,'datasets/CRISPRi')
write_adata_pkl(Ci_adata,'datasets/CRISPRa_A549')
write_adata_pkl(Ci_adata,'datasets/CRISPRa_k562')

Object successfully saved to "datasets/CRISPRi.pkl"
Object successfully saved to "datasets/CRISPRa_A549.pkl"
Object successfully saved to "datasets/CRISPRa_k562.pkl"


In [47]:
# Ci_adata.write('datasets/CRISPRi.h5ad')
# Ca_A549_adata.write_h5ad('datasets/CRISPRa_A549.h5ad')
# Ca_k562_adata.write_h5ad('datasets/CRISPRa_k562.h5ad')

# 

In [26]:
from watermark import watermark
print(
    watermark()
)
print('_'*80)
print(
    watermark(iversions=True, globals_=globals())
)

Last updated: 2023-07-18T14:09:14.468214-07:00

Python implementation: CPython
Python version       : 3.9.16
IPython version      : 8.14.0

Compiler    : GCC 11.2.0
OS          : Linux
Release     : 3.10.0-957.27.2.el7.x86_64
Machine     : x86_64
Processor   : x86_64
CPU cores   : 64
Architecture: 64bit

________________________________________________________________________________
pandas    : 2.0.3
matplotlib: 3.7.2
sys       : 3.9.16 (main, Jan 11 2023, 16:05:54) 
[GCC 11.2.0]
scipy     : 1.11.1
seaborn   : 0.12.2
numpy     : 1.24.4
scanpy    : 1.9.3
screenpro : 0.2.1
anndata   : 0.9.1

