In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib.patches as mplpatches
import plotnine as p9

In [2]:
sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()
sc.settings.set_figure_params(dpi=200)



scanpy==1.8.2 anndata==0.7.8 umap==0.5.2 numpy==1.21.5 scipy==1.7.3 pandas==1.3.5 scikit-learn==1.0.2 statsmodels==0.13.2 python-igraph==0.9.9 pynndescent==0.5.6


In [3]:
T0_adata=sc.read_h5ad(output_d+'/T0_OAK/Anndata/OAK1to20_GEXwLineage_raw.h5ad')

In [4]:
LineageName=T0_adata.var.index[T0_adata.var['feature_types']== "CRISPR Guide Capture"]

LineageData=sc.get.obs_df(T0_adata,keys=LineageName.tolist())

TopSup=LineageData.apply(lambda row: row.nlargest(2).values[0],axis=1)

df_lineage=TopSup.to_frame()

df_lineage.columns=['nUMI_TopSupLineage']

df_lineage['nUMI_secondary']=LineageData.apply(lambda row: row.nlargest(2).values[-1],axis=1)

df_lineage['second/top']=df_lineage['nUMI_secondary']/df_lineage['nUMI_TopSupLineage']

df_lineage['Lineage_assigned']=LineageData.idxmax(axis=1)*(df_lineage['second/top']<=0.5)

In [5]:
T0_adata.obs=T0_adata.obs.merge(df_lineage[['Lineage_assigned']],how="left",left_index=True,right_index=True)

In [6]:
T0_GEX_adata_filtered=T0_adata[T0_adata.obs['Lineage_assigned']!='',T0_adata.var["feature_types"] == "Gene Expression"]

In [7]:
T0_GEX_adata_filtered

View of AnnData object with n_obs × n_vars = 34829 × 36601
    obs: 'aliquot', 'cell_bc', 'batch', 'Lineage_assigned'
    var: 'gene_ids', 'feature_types'

In [8]:
T0_GEX_adata_filtered.obs['sample']='D0'

Trying to set attribute `.obs` of view, copying.


In [11]:
T3_GEX_adata_filtered=sc.read_h5ad(output_d+'/T3_OAK/Anndata/OAK1to12_GEXwLineage_filtered.h5ad')

In [12]:
T3_GEX_adata_filtered

AnnData object with n_obs × n_vars = 40554 × 36601
    obs: 'aliquot', 'cell_bc', 'batch', 'nUMI_TopSupLineage', 'nUMI_secondary', 'second/top', 'Lineage_assigned'
    var: 'gene_ids', 'feature_types'

In [14]:
T3_GEX_adata_filtered.obs['sample']='D10'

In [15]:
T5_clone_adata=sc.read_h5ad(output_d+'/T5_control/Anndata/AggD20andClone1_all.h5ad')

In [16]:
T5_clone_GEX_adata_filtered=T5_clone_adata[T5_clone_adata.obs['num_features']==1.0,T5_clone_adata.var["feature_types"] == "Gene Expression"]

In [17]:
T5_clone_GEX_adata_filtered

View of AnnData object with n_obs × n_vars = 19917 × 36601
    obs: 'sample', 'cell_bc', 'batch', 'cell_barcode', 'num_features', 'feature_call', 'num_umis'
    var: 'gene_ids', 'feature_types'

In [18]:
T5_clone_GEX_adata_filtered.obs['Lineage_assigned']=T5_clone_GEX_adata_filtered.obs['feature_call']

Trying to set attribute `.obs` of view, copying.


In [20]:
combined_adata=T0_GEX_adata_filtered.concatenate(T3_GEX_adata_filtered.concatenate(T5_clone_GEX_adata_filtered))

In [21]:
combined_adata

AnnData object with n_obs × n_vars = 95300 × 36601
    obs: 'aliquot', 'cell_bc', 'batch', 'Lineage_assigned', 'sample', 'percent_MAPK', 'nUMI_TopSupLineage', 'nUMI_secondary', 'second/top', 'cell_barcode', 'num_features', 'feature_call', 'num_umis'
    var: 'gene_ids', 'feature_types'

In [22]:
combined_adata.obs['is_GENBC0283767']='Others'
combined_adata.obs.loc[combined_adata.obs['Lineage_assigned']=='GENBC0283767','is_GENBC0283767'] = 'GENBC0283767' 

In [23]:
sc.pp.calculate_qc_metrics(combined_adata, inplace=True)

In [24]:
sc.pp.normalize_total(combined_adata, target_sum=1e4)

normalizing counts per cell
    finished (0:00:02)


In [25]:
sc.pp.log1p(combined_adata)

In [26]:
combined_adata.raw = combined_adata

In [41]:
combined_adata.write(output_d+'/Anndata/T0T3T5Clone1_combined.h5ad')

... storing 'aliquot' as categorical
... storing 'cell_bc' as categorical
... storing 'Lineage_assigned' as categorical
... storing 'sample' as categorical
... storing 'is_GENBC0283767' as categorical


In [4]:
combined_adata=sc.read_h5ad(output_d+'/Anndata/T0T3T5Clone1_combined.h5ad')

In [5]:
combined_adata.obs

Unnamed: 0,aliquot,cell_bc,batch,Lineage_assigned,sample,percent_MAPK,nUMI_TopSupLineage,nUMI_secondary,second/top,cell_barcode,...,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,MAPKactivity_score,MAPK_scaled
AAACCCACACATGAAA-1-0-0,1,AAACCCACACATGAAA-1,0,GENBC0211263,D0,0.001251,,,,,...,4380,8.385032,11190.0,9.322865,15.263628,20.545130,28.302055,43.404826,0.398060,8.773926
AAACCCATCAGGAGAC-1-0-0,1,AAACCCATCAGGAGAC-1,0,GENBC0253909,D0,0.000561,,,,,...,3290,8.098947,7126.0,8.871646,18.692113,24.571990,32.837496,48.512489,0.090546,0.669834
AAACGCTAGTCGGCAA-1-0-0,1,AAACGCTAGTCGGCAA-1,0,GENBC0218015,D0,0.000382,,,,,...,3400,8.131825,7846.0,8.967887,24.789702,30.040785,37.114453,50.573541,0.049208,-0.672717
AAACGCTGTGGTGATG-1-0-0,1,AAACGCTGTGGTGATG-1,0,GENBC0218015,D0,0.000146,,,,,...,2955,7.991592,6836.0,8.830104,25.965477,31.729081,40.184318,55.354008,-0.081940,-2.417376
AAAGAACCACCAGGTC-1-0-0,1,AAAGAACCACCAGGTC-1,0,GENBC0253909,D0,0.000289,,,,,...,3068,8.029107,6910.0,8.840870,22.981187,28.798842,37.134588,52.677279,-0.025097,-1.956690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGTTGTCCCATACC-1-1-1-1,,TTTGTTGTCCCATACC-1,1,GENBC0283767,Resist_clone1,0.000219,,,,TTTGTTGTCCCATACC-1,...,1710,7.444833,4571.0,8.427706,31.634216,43.163422,55.020783,71.844235,0.001507,-2.013376
TTTGTTGTCCGACGGT-1-1-1-1,,TTTGTTGTCCGACGGT-1,1,GENBC0283767,Resist_clone1,0.000857,,,,TTTGTTGTCCGACGGT-1,...,4031,8.302018,10496.0,9.258844,22.665777,30.087652,38.509909,51.819741,0.206676,2.883851
TTTGTTGTCGCCTCTA-1-1-1-1,,TTTGTTGTCGCCTCTA-1,1,GENBC0283767,Resist_clone1,0.000400,,,,TTTGTTGTCGCCTCTA-1,...,3547,8.174139,10006.0,9.211040,25.624625,34.589246,44.003598,57.685389,0.118595,0.416379
TTTGTTGTCGGTATGT-1-1-1-1,,TTTGTTGTCGGTATGT-1,1,GENBC0283767,Resist_clone1,0.000574,,,,TTTGTTGTCGGTATGT-1,...,3142,8.052933,6964.0,8.848653,23.305572,30.370477,39.129810,53.604250,0.106872,0.394702


In [7]:
pd.crosstab(combined_adata.obs['aliquot'],combined_adata.obs['sample'])

sample,D0,D10
aliquot,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1744,3468
2,1571,2635
3,1974,3129
4,2067,3257
5,1822,3655
6,1674,3573
7,1717,3387
8,1734,3447
9,1749,3391
10,1608,3273


In [10]:
combined_adata.obs[combined_adata.obs['sample']=='D20']

Unnamed: 0,aliquot,cell_bc,batch,Lineage_assigned,sample,percent_MAPK,nUMI_TopSupLineage,nUMI_secondary,second/top,cell_barcode,...,n_genes_by_counts,log1p_n_genes_by_counts,total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,MAPKactivity_score,MAPK_scaled
AAACCCAAGTCCCGGT-1-0-1-1,,AAACCCAAGTCCCGGT-1,1,GENBC0253909,D20,0.000370,,,,AAACCCAAGTCCCGGT-1,...,3440,8.143517,10822.0,9.289429,25.559046,34.540750,44.880798,60.035114,0.111873,0.969553
AAACCCAGTGTCCTAA-1-0-1-1,,AAACCCAGTGTCCTAA-1,1,GENBC0226882,D20,0.000107,,,,AAACCCAGTGTCCTAA-1,...,3955,8.282989,9335.0,9.141633,17.975362,24.745581,32.779861,47.327263,-0.135862,-2.841741
AAACCCATCATACGGT-1-0-1-1,,AAACCCATCATACGGT-1,1,GENBC0218015,D20,0.000321,,,,AAACCCATCATACGGT-1,...,4973,8.511980,15587.0,9.654257,22.929364,30.069930,38.846475,52.069032,0.018301,-0.114581
AAACCCATCGGTGAAG-1-0-1-1,,AAACCCATCGGTGAAG-1,1,GENBC0205301,D20,0.001265,,,,AAACCCATCGGTGAAG-1,...,3528,8.168770,7908.0,8.975757,19.802731,26.681841,35.217501,49.228629,0.265447,7.303456
AAACGAAAGATTACCC-1-0-1-1,,AAACGAAAGATTACCC-1,1,GENBC0257789,D20,0.000124,,,,AAACGAAAGATTACCC-1,...,3122,8.046549,8044.0,8.992806,24.453008,32.695177,42.478866,57.521134,-0.026565,-2.688538
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TTTGGTTTCTAGAACC-3-0-1-1,,TTTGGTTTCTAGAACC-3,1,GENBC0211263,D20,0.000154,,,,TTTGGTTTCTAGAACC-3,...,4782,8.472823,12966.0,9.470163,18.910998,25.620855,33.826932,47.601419,-0.098741,-2.278543
TTTGTTGAGAGTTGTA-3-0-1-1,,TTTGTTGAGAGTTGTA-3,1,GENBC0278394,D20,0.000294,,,,TTTGTTGAGAGTTGTA-3,...,3951,8.281977,10202.0,9.230437,21.172319,28.415997,37.257401,51.842776,0.021348,-0.114011
TTTGTTGAGGAGGCAG-3-0-1-1,,TTTGTTGAGGAGGCAG-3,1,GENBC0218015,D20,0.001154,,,,TTTGTTGAGGAGGCAG-3,...,2001,7.601902,3465.0,8.150757,20.461760,27.417027,36.998557,55.353535,0.230367,4.099583
TTTGTTGCAATAACGA-3-0-1-1,,TTTGTTGCAATAACGA-3,1,GENBC0226882,D20,0.000000,,,,TTTGTTGCAATAACGA-3,...,1887,7.543273,3915.0,8.272826,26.615581,35.453384,45.951469,63.320562,-0.168331,-4.240706


In [12]:
combined_adata.obs[['aliquot','cell_bc','Lineage_assigned','sample']].to_csv('T0T3T5Clone1_combined_scLineage.csv')