In [1]:
import sys
import pandas as pd
from scipy import stats
from functools import reduce
import matplotlib.pyplot as plt
import seaborn as sns
# from venn import venn 
# from upsetplot import from_contents,UpSet
from matplotlib.backends.backend_pdf import PdfPages
# import stringdb

from utils import *

In [2]:
drug_names = {
    'Pi': 'PARPi',
    'Ri': 'ATRi',
    'Wi': 'WEE1i',
    'Mi': 'ATMi',
    'Ki': 'DNAPKi',
    'PiRi': 'PARPi+ATRi',
    'PiWi': 'PARPi+WEE1i',
    'PiMi': 'PARPi+ATMi',
    'PiKi': 'PARPi+DNAPKi'
}

In [3]:
# import scanpy as sc
import anndata as ad

In [4]:
import screenpro as scp

In [5]:
from screenpro.load import loadScreenProcessingData, write_adata_pkl

In [6]:
def mergeColumnIndex(df_in):
    df = df_in.copy()
    df.columns = df.columns.map(' '.join)
    return df

In [7]:
def convertScreenProcessing_to_ScreenPro(data_in):
    """make `ScreenPro` object from an ScreenProcessing dataset
    """
    if type(data_in) is str:
        data = loadScreenProcessingData(data_in)
    else:
        data = data_in.copy()
    
    adata = ad.AnnData(
        var = data['counts'].columns.to_frame().rename(columns={0:'treatment',1:'replicate'}).reset_index(drop=True).rename(index=lambda s: f'sample_{str(s)}'),
        obs=data['library']
    )
    
    adata.X = data['counts'].loc[adata.obs.index,:].to_numpy()
    
    screen = scp.ScreenPro(adata)
    screen.phenotypes.update({
        'phenotypes': mergeColumnIndex(data['phenotypes']),
        "transcript_scores": data['transcript scores'],
        "gene_scores": data['gene scores']
    })

    # adata.uns['transcript scores'] = mergeColumnIndex(data['transcript scores']).reset_index().set_index('gene')
    # adata.uns['gene scores'] = mergeColumnIndex(data['gene scores'])
    # adata.transcript_scores = data['transcript scores']
    # adata.gene_scores = data['gene scores']

    return screen

___

### CRISPRi PRDX1-KD datasets

In [8]:
Ci_prdx1kd_adata = ad.read_h5ad('CRISPRi-PRDX1-screens/results/adata_counts.h5ad.gz')

In [9]:
Ci_prdx1kd = scp.ScreenPro(Ci_prdx1kd_adata)

In [10]:
Ci_prdx1kd.phenotypes.update({
    "gene_scores": pd.read_excel(
        'CRISPRi-PRDX1-screens/results/phenotype_scores.xlsx',index_col=0,header=[0,1]
    )}
)

### CRISPRi datasets

In [11]:
Ci = convertScreenProcessing_to_ScreenPro('CRISPRi/Analysis/PiKiMiRiWi/PiKiMiRiWi_2reps')

In [12]:
Ci.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'DNAPKi':'rho2',
    'ATMi':'rho3',
    'ATRi':'rho4',
    'WEE1i':'rho5',
    'PARPi+DNAPKi':'rho6',
    'PARPi+ATMi':'rho7',
    'PARPi+ATRi':'rho8',
    'PARPi+WEE1i':'rho9'
}

### CRISPRa datasets

make `AnnData`object

In [14]:
Ca_k562 = convertScreenProcessing_to_ScreenPro('CRISPRa/Analysis/k562/k562a_PiRi')
Ca_A549 = convertScreenProcessing_to_ScreenPro('CRISPRa/Analysis/single_replicate_A549/PiRiWi_1stRep/CRISPRa_PiWiRi')

Ca_k562.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'PARPi+ATRi':'rho2',
}

Ca_A549.comparisons = {
    'gamma':'gamma',
    'PARPi':'rho1',
    'ATRi':'rho2',
    'WEE1i':'rho3',
    'PARPi+ATRi':'rho4',
    'PARPi+WEE1i':'rho5',
}

___

In [15]:
Ca_k562.adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,rep1
sample_1,DMSO,rep2
sample_2,Pi,rep1
sample_3,Pi,rep2
sample_4,PiRi,rep1
sample_5,PiRi,rep2
sample_6,T0,rep1
sample_7,T0,rep2


In [17]:
Ca_A549.adata.var

Unnamed: 0,treatment,replicate
sample_0,DMSO,Rep1
sample_1,Pi,Rep1
sample_2,PiRi,Rep1
sample_3,PiWi,Rep1
sample_4,Ri,Rep1
sample_5,T0,Rep1
sample_6,Wi,Rep1


### Save data

In [18]:
!mkdir -p datasets

In [19]:
write_adata_pkl(Ci_prdx1kd,'datasets/CRISPRi_PRDX1-KD')

Object successfully saved to "datasets/CRISPRi_PRDX1-KD.pkl"


In [None]:
write_adata_pkl(Ci,'datasets/CRISPRi')
write_adata_pkl(Ca_A549,'datasets/CRISPRa_A549')
write_adata_pkl(Ca_k562,'datasets/CRISPRa_k562')

Object successfully saved to "datasets/CRISPRi.pkl"
Object successfully saved to "datasets/CRISPRa_A549.pkl"


In [None]:
# Ci_adata.write('datasets/CRISPRi.h5ad')
# Ca_A549_adata.write_h5ad('datasets/CRISPRa_A549.h5ad')
# Ca_k562_adata.write_h5ad('datasets/CRISPRa_k562.h5ad')

# 

In [None]:
from watermark import watermark
print(
    watermark()
)
print('_'*80)
print(
    watermark(iversions=True, globals_=globals())
)