# Assemble BCell Data and Assign Isotypes

PseudoCode:  

* Download CertPro B cell anndata objects (all-b-cell and memory b w/ clusters)
* Assemble into a single object with memory B clusters
* Perform Isotype assignment
     * Save cell-level table of isotype assignments for each cutoff
* Make pseudobulks by B cell L3 type and isotype
     * Save H5ad
* Check All cell types for positivity against IgH isotype thresholds
    * Save table of percent cells positive per L3 celltype per IgH

## Set Up

In [1]:
!! python --version

['Python 3.7.12']

In [2]:
import sys
sys.path.insert(0, '/home/jupyter/hisepy/build/lib')

In [3]:
import hisepy as hp

In [4]:
import anndata as ad
import scanpy as sc
import pandas as pd 
import numpy as np
import re
import matplotlib

import h5py
import statistics as stats
import os



In [5]:
import matplotlib.pyplot as plt
from itertools import combinations 

In [6]:
def plotdim(w=7,h=7):
    matplotlib.rcParams['figure.figsize'] = [w,h]

In [7]:
outdir = './_output/certpro'

In [8]:
if not os.path.exists(outdir):
    print("Creating directory {}".format(outdir))
    os.mkdir(outdir)

## Download Data

In [9]:
os.getcwd()

'/home/jupyter/ra-longitudinal/scRNA/Aim3_switched-followup'

In [10]:
fid_alldata = '609f7543-d4d5-41e9-a3d2-8e50c3e7c61d'
fid_bmem_clusters = '89859cec-8d36-47fd-a165-f0f34ae90284'  # 15.4gb
fid_in = [fid_alldata,fid_bmem_clusters]

In [13]:
hp.cache_files(file_ids = fid_in)

downloading fileID: 609f7543-d4d5-41e9-a3d2-8e50c3e7c61d
downloading fileID: 89859cec-8d36-47fd-a165-f0f34ae90284
Files have been successfully downloaded!


## Read and Format Data

### Full object

In [14]:
adata_all_raw = sc.read_h5ad("/home/jupyter/cache/609f7543-d4d5-41e9-a3d2-8e50c3e7c61d/ALTRA_certPro_scRNA_141_samples_combined_adata.h5ad")



In [15]:
adata_all_raw.obs.columns

Index(['barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id',
       'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads',
       'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id',
       'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid',
       'subject.biologicalSex', 'subject.race', 'subject.ethnicity',
       'subject.birthYear', 'sample.visitName', 'sample.drawDate',
       'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit',
       'file.id', 'subset_grp', 'predicted_doublet', 'doublet_score',
       'AIFI_L1', 'AIFI_L1_score', 'AIFI_L2', 'AIFI_L2_score', 'AIFI_L3',
       'AIFI_L3_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'total_counts_mito',
       'log1p_total_counts_mito', 'pct_counts_mito', 'leiden_harmony_2',


In [16]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(adata_all_raw.obs.loc[adata_all_raw.obs.AIFI_L1 == 'B cell',].AIFI_L3_new.value_counts())

Core naive B cell                       109207
Core memory B cell                       38623
Transitional B cell                      15358
CD27+ effector B cell                     6556
ISG+ naive B cell                         6302
CD27- effector B cell                     6221
Type 2 polarized memory B cell            3267
Plasma cell                               2050
CD95 memory B cell                        2002
Early memory B cell                       1451
Activated memory B cell                    350
Core naive CD8 T cell                       97
Early memory B cell_uk1                     83
CM CD4 T cell                               59
T2MBC_uk1                                   47
Activated memory B cell_uk1                 32
KLRF1- GZMB+ CD27- memory CD4 T cell         9
GZMB- CD27+ EM CD4 T cell                    9
Core naive CD4 T cell                        6
KLRF1- GZMB+ CD27- EM CD8 T cell             5
GZMK- CD56dim NK cell                        5
ILC          

### B Mem object

In [17]:
adata_bmem = sc.read_h5ad("/home/jupyter/cache/89859cec-8d36-47fd-a165-f0f34ae90284/ALTRA_scRNA_Bmem_cells_filtered_certPro.h5ad")

In [18]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(adata_bmem.obs.AIFI_L3_new.value_counts())

Core memory B cell                38569
CD27+ effector B cell              6453
CD27- effector B cell              6213
Type 2 polarized memory B cell     3264
CD95 memory B cell                 1999
Early memory B cell                1448
Activated memory B cell             349
Name: AIFI_L3_new, dtype: int64


### Subset All B

In [19]:
b_types = [
    'Core naive B cell','Core memory B cell','Transitional B cell','CD27+ effector B cell',
    'ISG+ naive B cell','CD27- effector B cell','Type 2 polarized memory B cell',
    'Plasma cell','CD95 memory B cell','Early memory B cell','Activated memory B cell',
    'Activated memory B cell_uk1'
]
bmem_types = [
    'Core memory B cell','CD27+ effector B cell','CD27- effector B cell','Type 2 polarized memory B cell',
    'CD95 memory B cell','Early memory B cell','Activated memory B cell'
]
bother_types = [
    'Core naive B cell','Transitional B cell','ISG+ naive B cell','Plasma cell'
]
bnaive_types = [
    'Core naive B cell','Transitional B cell','ISG+ naive B cell'
]
all([x in adata_all_raw.obs.AIFI_L3_new.values for x in b_types])

True

In [20]:
# Bmem object counts
adata_bmem.obs.AIFI_L3_new.value_counts()

Core memory B cell                38569
CD27+ effector B cell              6453
CD27- effector B cell              6213
Type 2 polarized memory B cell     3264
CD95 memory B cell                 1999
Early memory B cell                1448
Activated memory B cell             349
Name: AIFI_L3_new, dtype: int64

In [21]:
adata_allb = adata_all_raw[adata_all_raw.obs.AIFI_L3_new.isin(b_types),].copy()

In [22]:
# All-cell object, B cell counts
adata_allb.obs.AIFI_L3_new.value_counts()

Core naive B cell                 109212
Core memory B cell                 38623
Transitional B cell                15358
CD27+ effector B cell               6556
ISG+ naive B cell                   6302
CD27- effector B cell               6221
Type 2 polarized memory B cell      3267
Plasma cell                         2053
CD95 memory B cell                  2002
Early memory B cell                 1451
Activated memory B cell              350
Activated memory B cell_uk1           32
Name: AIFI_L3_new, dtype: int64

In [23]:
# keep bmem cells only if in bmem object, keep all other types
bc_mem = adata_bmem.obs.barcodes.values.tolist()
print(len(bc_mem))
bc_other = [adata_allb.obs.barcodes.values[i] for i in range(adata_allb.obs.shape[0]) if adata_allb.obs.AIFI_L3_new[i] in bother_types]
print(len(bc_other))
bc_keep = bc_mem.copy()
bc_keep.extend(bc_other)
print(len(bc_keep))

58295
132925
191220


In [24]:
adata_allb = adata_allb[adata_allb.obs.barcodes.isin(bc_keep),].copy()

In [25]:
adata_allb

AnnData object with n_obs × n_vars = 191220 × 33538
    obs: 'barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id', 'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads', 'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id', 'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid', 'subject.biologicalSex', 'subject.race', 'subject.ethnicity', 'subject.birthYear', 'sample.visitName', 'sample.drawDate', 'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit', 'file.id', 'subset_grp', 'predicted_doublet', 'doublet_score', 'AIFI_L1', 'AIFI_L1_score', 'AIFI_L2', 'AIFI_L2_score', 'AIFI_L3', 'AIFI_L3_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'leiden_harmony_2', 'doublets_manual', 'AIFI_L3_new', 'St

In [26]:
# All-cell object, B cell counts
adata_allb.obs.AIFI_L3_new.value_counts()

Core naive B cell                 109212
Core memory B cell                 38569
Transitional B cell                15358
CD27+ effector B cell               6453
ISG+ naive B cell                   6302
CD27- effector B cell               6213
Type 2 polarized memory B cell      3264
Plasma cell                         2053
CD95 memory B cell                  1999
Early memory B cell                 1448
Activated memory B cell              349
Name: AIFI_L3_new, dtype: int64

### All B Cell Count Check

Verify calculation method of # of b cells per sample (variable present in memory b cell obs)

In [27]:
all_b_df = adata_all_raw[adata_all_raw.obs.AIFI_L1.isin(['B cell']),].obs.copy()

In [28]:
all_b_df.groupby('sample.sampleKitGuid').size()

sample.sampleKitGuid
KT00052    1432
KT00055     880
KT00056    1136
KT00057     970
KT00058     912
           ... 
KT04905     936
KT04906    1727
KT04924    1621
KT04933    2400
KT04937     907
Length: 141, dtype: int64

In [29]:
df_b_counts = adata_bmem.obs.loc[:,['sample.sampleKitGuid','total_b_counts']].drop_duplicates().sort_values(by = 'sample.sampleKitGuid')

In [30]:
bcount_check = all_b_df.groupby('sample.sampleKitGuid').size().values == df_b_counts.total_b_counts
bcount_check.all()

True

### Add BMem Cluster Labels

In [31]:
adata_allb.obs['index'] = adata_allb.obs.index

In [32]:
adata_allb.obs.shape

(191220, 61)

In [33]:
adata_allb

AnnData object with n_obs × n_vars = 191220 × 33538
    obs: 'barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id', 'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads', 'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id', 'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid', 'subject.biologicalSex', 'subject.race', 'subject.ethnicity', 'subject.birthYear', 'sample.visitName', 'sample.drawDate', 'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit', 'file.id', 'subset_grp', 'predicted_doublet', 'doublet_score', 'AIFI_L1', 'AIFI_L1_score', 'AIFI_L2', 'AIFI_L2_score', 'AIFI_L3', 'AIFI_L3_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts', 'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes', 'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes', 'pct_counts_in_top_500_genes', 'total_counts_mito', 'log1p_total_counts_mito', 'pct_counts_mito', 'leiden_harmony_2', 'doublets_manual', 'AIFI_L3_new', 'St

In [34]:
adata_bmem.obs.columns

Index(['barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id',
       'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads',
       'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id',
       'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid',
       'subject.biologicalSex', 'subject.race', 'subject.ethnicity',
       'subject.birthYear', 'sample.visitName', 'sample.drawDate',
       'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit',
       'file.id', 'subset_grp', 'predicted_doublet', 'doublet_score',
       'AIFI_L1', 'AIFI_L1_score', 'AIFI_L2', 'AIFI_L2_score', 'AIFI_L3',
       'AIFI_L3_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'total_counts_mito',
       'log1p_total_counts_mito', 'pct_counts_mito', 'leiden_harmony_2',


In [35]:
cluster_vals = adata_bmem.obs[['barcodes','leiden_0_8','leiden_0_8_anno']].copy()
cluster_vals.head()

Unnamed: 0,barcodes,leiden_0_8,leiden_0_8_anno
9291,2f3d3192226811eea184aec535c48243,4,CD27+ effector B cell
9292,b5aa7f4c120211eb859a46b4e30ed972,5,CD27+ effector B cell
9293,ef4fc95e429511eca55b3612ac783560,4,CD27+ effector B cell
9294,456f1d76428711ecbe4f8ede1e1ebd6b,1,CD27+ effector B cell
9295,f53ac52e429c11ec80811e9b3f1bfe57,4,CD27+ effector B cell


In [36]:
adata_allb.obs = adata_allb.obs.join(cluster_vals.set_index('barcodes'), on='barcodes', how='left')

In [37]:
adata_allb.obs = adata_allb.obs.join(df_b_counts.set_index('sample.sampleKitGuid'), on='sample.sampleKitGuid', how='left')

In [38]:
adata_allb.obs.shape

(191220, 64)

In [39]:
adata_allb.obs.columns

Index(['barcodes', 'batch_id', 'cell_name', 'cell_uuid', 'chip_id',
       'hto_barcode', 'hto_category', 'n_genes', 'n_mito_umis', 'n_reads',
       'n_umis', 'original_barcodes', 'pbmc_sample_id', 'pool_id', 'well_id',
       'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid',
       'subject.biologicalSex', 'subject.race', 'subject.ethnicity',
       'subject.birthYear', 'sample.visitName', 'sample.drawDate',
       'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit',
       'file.id', 'subset_grp', 'predicted_doublet', 'doublet_score',
       'AIFI_L1', 'AIFI_L1_score', 'AIFI_L2', 'AIFI_L2_score', 'AIFI_L3',
       'AIFI_L3_score', 'n_genes_by_counts', 'log1p_n_genes_by_counts',
       'total_counts', 'log1p_total_counts', 'pct_counts_in_top_50_genes',
       'pct_counts_in_top_100_genes', 'pct_counts_in_top_200_genes',
       'pct_counts_in_top_500_genes', 'total_counts_mito',
       'log1p_total_counts_mito', 'pct_counts_mito', 'leiden_harmony_2',


In [40]:
adata_allb.obs.leiden_0_8.value_counts()

0     9125
1     8796
2     7260
3     6379
4     6202
5     5948
6     3377
7     3353
8     3080
9     2543
10    2174
11      58
Name: leiden_0_8, dtype: int64

In [41]:
adata_allb.obs.total_b_counts.describe()

count    191220.000000
mean       1687.208284
std         788.327508
min         181.000000
25%        1176.000000
50%        1554.000000
75%        2117.000000
max        4453.000000
Name: total_b_counts, dtype: float64

In [42]:
adata_allb.obs.total_b_counts.isnull().any()

False

In [43]:
adata_allb.obs.head()

Unnamed: 0,barcodes,batch_id,cell_name,cell_uuid,chip_id,hto_barcode,hto_category,n_genes,n_mito_umis,n_reads,...,BMI,CMV_Status_Subj,age_conv,bmi_conv,file.batchID,status,index,leiden_0_8,leiden_0_8_anno,total_b_counts
9291,2f3d3192226811eea184aec535c48243,B159,regulable_integral_leafwing,2f3d3192226811eea184aec535c48243,B159-P1C1,TTCCGCCTCTCTTTG,singlet,1457,170,19343,...,20.079601,,,,B159,ALTRA_healthy,9291,4,CD27+ effector B cell,2365
9292,b5aa7f4c120211eb859a46b4e30ed972,B026,fungiform_punctual_ram,b5aa7f4c120211eb859a46b4e30ed972,B026-P2C2,TGTCTTTCCTGCCAG,singlet,2478,282,25517,...,23.6996,Negative,58.0,25.648918,B026,at_risk,9292,5,CD27+ effector B cell,1484
9293,ef4fc95e429511eca55b3612ac783560,B087,semisolemn_tyrannical_tarsier,ef4fc95e429511eca55b3612ac783560,B087-P2C2,TTCCGCCTCTCTTTG,singlet,1988,266,31449,...,26.82061,Negative,,,B087,at_risk,9293,4,CD27+ effector B cell,1298
9294,456f1d76428711ecbe4f8ede1e1ebd6b,B087,dubnium_weakminded_termite,456f1d76428711ecbe4f8ede1e1ebd6b,B087-P2C3,CTCCTCTGCAATTAC,singlet,1823,208,29279,...,40.538375,Positive,46.0,42.650542,B087,at_risk,9294,1,CD27+ effector B cell,1290
9295,f53ac52e429c11ec80811e9b3f1bfe57,B087,bosky_reliable_robin,f53ac52e429c11ec80811e9b3f1bfe57,B087-P2C2,TTCCGCCTCTCTTTG,singlet,1825,354,26537,...,26.82061,Negative,,,B087,at_risk,9295,4,CD27+ effector B cell,1298


Check a random sample to verify accuracy

In [44]:
adata_allb.obs[adata_allb.obs.barcodes == '456f1d76428711ecbe4f8ede1e1ebd6b']

Unnamed: 0,barcodes,batch_id,cell_name,cell_uuid,chip_id,hto_barcode,hto_category,n_genes,n_mito_umis,n_reads,...,BMI,CMV_Status_Subj,age_conv,bmi_conv,file.batchID,status,index,leiden_0_8,leiden_0_8_anno,total_b_counts
9294,456f1d76428711ecbe4f8ede1e1ebd6b,B087,dubnium_weakminded_termite,456f1d76428711ecbe4f8ede1e1ebd6b,B087-P2C3,CTCCTCTGCAATTAC,singlet,1823,208,29279,...,40.538375,Positive,46.0,42.650542,B087,at_risk,9294,1,CD27+ effector B cell,1290


In [45]:
adata_bmem.obs[adata_bmem.obs.barcodes == '456f1d76428711ecbe4f8ede1e1ebd6b']

Unnamed: 0,barcodes,batch_id,cell_name,cell_uuid,chip_id,hto_barcode,hto_category,n_genes,n_mito_umis,n_reads,...,pct_counts_ribo,total_counts_hb,log1p_total_counts_hb,pct_counts_hb,leiden_0_5,leiden_0_8,leiden_1,leiden_1_2,swit_effect_leiden,leiden_0_8_anno
9294,456f1d76428711ecbe4f8ede1e1ebd6b,B087,dubnium_weakminded_termite,456f1d76428711ecbe4f8ede1e1ebd6b,B087-P2C3,CTCCTCTGCAATTAC,singlet,1823,208,29279,...,38.676448,0,0.0,0.0,0,1,1,1,CD27+ effector B cell,CD27+ effector B cell


### Scanpy Processing

In [46]:
# confirm X contains raw counts
adata_allb.X[1:30,1:30].todense()

matrix([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
         0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1,
         0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1,
         0, 0, 0, 0, 0, 1, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

In [47]:
# save raw and re-normalize
adata_allb.layers['X_raw'] = adata_allb.X.copy()
sc.pp.normalize_total(adata_allb, target_sum=10000)
adata_allb.layers['X_norm'] = adata_allb.X.copy()
sc.pp.log1p(adata_allb)
adata_allb.layers['X_norm_log1p'] = adata_allb.X.copy()
sc.pp.scale(adata_allb)
adata_allb.layers['X_scale'] = adata_allb.X.copy()

## Variables

In [48]:
ig_genes = ['IGHG1','IGHG2','IGHG3','IGHG4','IGHA1','IGHA2','IGHM','IGHD','IGHE']

## All Cell Isotype Threshold Proportions

Check IGH UMI countsfor all cell types (including non-B cells). 
* Apply same thresholds to see what percent of cells per celltype are 'positive' for IGH
* Output table for downstream visualization

In [49]:
temp_cols = adata_all_raw.obs.columns.tolist()
temp_cols.extend(ig_genes)

In [50]:
adata_all_raw.layers['X_raw'] = adata_all_raw.X.copy()
sc.pp.normalize_total(adata_all_raw, target_sum=10000)
adata_all_raw.layers['X_norm'] = adata_all_raw.X.copy()

In [51]:
df_all_cells = sc.get.obs_df(adata_all_raw, keys = temp_cols, layer='X_norm')

In [52]:
def pct_gt_cutoff(cutoff):
    def pct_gt_cutoff_(x):
        temp = x>cutoff
        ngt = sum(temp)
        ntot = len(x)
        pct_gt = ngt/ntot*100
        return pct_gt
    pct_gt_cutoff_.__name__ = 'pct_gt_cutoff_{}'.format(cutoff)
    return pct_gt_cutoff_

In [54]:
def percentile(n):
    def percentile_(x):
        return x.quantile(n)
    percentile_.__name__ = 'percentile_{:02.0f}'.format(n*100)
    return percentile_

In [55]:
fun_names =  [pct_gt_cutoff(0.5), pct_gt_cutoff(2), 'size']
fun_dict = {x: fun_names for x in ig_genes}
all_cell_iso_prop = df_all_cells.groupby(['AIFI_L3_new']).agg(fun_dict)

In [56]:
type(all_cell_iso_prop)

pandas.core.frame.DataFrame

In [57]:
all_cell_iso_prop.columns = [' '.join(col).strip() for col in all_cell_iso_prop.columns.values]

In [58]:
all_cell_iso_prop.columns

Index(['IGHG1 pct_gt_cutoff_0.5', 'IGHG1 pct_gt_cutoff_2', 'IGHG1 size',
       'IGHG2 pct_gt_cutoff_0.5', 'IGHG2 pct_gt_cutoff_2', 'IGHG2 size',
       'IGHG3 pct_gt_cutoff_0.5', 'IGHG3 pct_gt_cutoff_2', 'IGHG3 size',
       'IGHG4 pct_gt_cutoff_0.5', 'IGHG4 pct_gt_cutoff_2', 'IGHG4 size',
       'IGHA1 pct_gt_cutoff_0.5', 'IGHA1 pct_gt_cutoff_2', 'IGHA1 size',
       'IGHA2 pct_gt_cutoff_0.5', 'IGHA2 pct_gt_cutoff_2', 'IGHA2 size',
       'IGHM pct_gt_cutoff_0.5', 'IGHM pct_gt_cutoff_2', 'IGHM size',
       'IGHD pct_gt_cutoff_0.5', 'IGHD pct_gt_cutoff_2', 'IGHD size',
       'IGHE pct_gt_cutoff_0.5', 'IGHE pct_gt_cutoff_2', 'IGHE size'],
      dtype='object')

In [59]:
all_cell_iso_prop.head()

Unnamed: 0_level_0,IGHG1 pct_gt_cutoff_0.5,IGHG1 pct_gt_cutoff_2,IGHG1 size,IGHG2 pct_gt_cutoff_0.5,IGHG2 pct_gt_cutoff_2,IGHG2 size,IGHG3 pct_gt_cutoff_0.5,IGHG3 pct_gt_cutoff_2,IGHG3 size,IGHG4 pct_gt_cutoff_0.5,...,IGHA2 size,IGHM pct_gt_cutoff_0.5,IGHM pct_gt_cutoff_2,IGHM size,IGHD pct_gt_cutoff_0.5,IGHD pct_gt_cutoff_2,IGHD size,IGHE pct_gt_cutoff_0.5,IGHE pct_gt_cutoff_2,IGHE size
AIFI_L3_new,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ASDC,0.566572,0.0,353,0.0,0.0,353,0.0,0.0,353,0.283286,...,353,32.294618,2.832861,353,14.447592,0.283286,353,0.0,0.0,353
ASDC_uk1_B,0.0,0.0,164,0.0,0.0,164,0.0,0.0,164,0.0,...,164,68.292683,26.829268,164,19.512195,0.0,164,0.0,0.0,164
Activated memory B cell,28.0,13.714286,350,26.571429,12.571429,350,25.714286,13.142857,350,8.857143,...,350,78.285714,76.285714,350,54.0,45.428571,350,2.285714,1.714286,350
Activated memory B cell_uk1,25.0,15.625,32,40.625,15.625,32,65.625,46.875,32,18.75,...,32,100.0,100.0,32,84.375,71.875,32,0.0,0.0,32
Adaptive NK cell,0.275957,0.213867,14495,0.13108,0.124181,14495,0.296654,0.248362,14495,0.262159,...,14495,2.44912,1.848913,14495,0.310452,0.25526,14495,0.034495,0.027596,14495


### Save

Save out table for plotting and further summary

In [60]:
all_cell_iso_prop.to_csv("./_output/certpro/check_igh_normcounts_alltypes.csv")

## Isotype Classification

### Functions

In [61]:
# 1) https://stackoverflow.com/questions/26784164/pandas-multiprocessing-apply
# 2) https://stackoverflow.com/questions/5442910/how-to-use-multiprocessing-pool-map-with-multiple-arguments
from multiprocessing import Pool
from itertools import repeat
from functools import partial

# 3) https://stackoverflow.com/questions/817087/call-a-function-with-argument-list-in-python
def mywrapper(func, args): 
    return func(*args)

def parallelize_row_args(df, df_func, args=[], num_of_processes=8):
    ''' Adapted code found in resource #1 with starmap for arguments (resource #2)
    
    Params
    ------
    df pandas.DataFrame
        A dataframe object we wish to parallelize row operations on
    df_func function
        A function that takes a dataframe as the first argument

    '''
    # Set up processes and split data
    data_split = np.array_split(df, num_of_processes)
    pool = Pool(num_of_processes)
    
    # Run fun on each split
    if args is None or len(args) == 0:
        data = pd.concat(pool.map(df_func, data_split))
    else:
        # Set up argument list for each split process if additional args
        arglist = [data_split]
        arglist.extend([repeat(x) for x in args])
        data = pd.concat(pool.starmap(df_func, mywrapper(zip, arglist)))
    
    # Shut down pool
    pool.close()
    pool.join()
    
    return data


In [62]:
def count_pos(df, ig_genes = ig_genes):
    res = df.apply(lambda x: sum(x[ig_genes]), axis = 1)
    return res

def concat_pos(df, ig_genes = ig_genes):
    res = df.apply(lambda x: ";".join([g for g in ig_genes if x[g] is True]), axis = 1)
    return res

def get_is_ig(df, ig, other_ig):
    res = df.apply(lambda x: x[ig] and not any(x[other_ig]), axis = 1)
    return res

def get_is_nonswitched(df, nonswitched, switched):
    res = df.apply(lambda x: any(x[nonswitched]) and not any(x[switched]), axis = 1)
    return res

def determine_isotype(df, gene_order_arr, pos_str_col = 'concat_pos', verbose = True):
    x = df.copy()
    res = ["undetermined" for x in range(df.shape[0])]
    
    for gene in gene_order_arr:
        if not isinstance(gene, str):
            bool_arr = [pd.notna(df[pos_str_col].str.extract('('+g+')')[0]) for g in gene]
            i = 1
            pat_found = bool_arr[0]
            while i < len(bool_arr):
                pat_found = np.logical_and(pat_found, bool_arr[i])
                i+=1
            gene = ";".join(gene)
        else:
            pat_found = pd.notna(x[pos_str_col].str.extract('('+gene+')')[0])
        
        pat_found = np.logical_and(pat_found, [x == 'undetermined' for x in res])
        n_found = len([x for x in pat_found if x is True])
        if verbose:
            print("{} matches for gene {}".format(n_found, gene))
        
        if n_found > 0:
            ifound = [i for i in range(len(pat_found)) if pat_found[i] == True]
            for ichange in ifound:
                res[ichange] = gene
    return res
            

def classify_ig_df(
    df,
    key = 'barcodes',
    ig_cutoffs = {
       'IGHG1':1,
       'IGHG2':1,
       'IGHG3':1,
       'IGHG4':1,
       'IGHA1':1,
       'IGHA2':1,
       'IGHM':1,
       'IGHD':1,
       'IGHE':1
    },
    switched = ['IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHA1', 'IGHA2','IGHE'],
    nonswitched = ['IGHM', 'IGHD'],
    gene_order_arr = [["IGHM","IGHD"], "IGHM","IGHD","IGHG3","IGHG1","IGHA1","IGHG2","IGHG4","IGHE","IGHA2"],
    keep_normcounts = True,
    num_of_processes = 8,
    verbose = True
):
    
    ''' Classify Ig status for wide dataframe of Ig Norm Counts

    Cells are labeled with switched isotypes a single switched isotype is above (gte) threshold and all other 
    isotypes are less than threshold. Cells are labeled with nonswitched isotypes when all switched isotypes are
    below threshold and one or more nonswitched isotype is above threshold.
    
    Algorithm developed with input from Ziyuan He and Marla Glass. Thresholds may be dataset specific but
    defaults were applied in normalized counts from a 3' scRNA dataset (ALTRA)

    Params
    ------
    df: pandas DataFrame of normalized ig counts
        Dataframe with rows as cells and columns as featres, eg as extracted from an AnnData object 
        using sc.get.obs_df(). Must contain a key column (e.g. barcodes) in addition to columns for
        all ig_genes.
    key: str, default 'barcodes'
        Column in df that is the unique row key
    ig_cutoffs: dict
        Dictionary with names as ig genes and values as cutoff values in the same units (eg normalized counts) 
        as Ig values in df. 
    switched: list, default ['IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHA1', 'IGHA2','IGHE'],
    nonswitched: list, default ['IGHM', 'IGHD'],
    keep_normcounts: bool, default True
        If True the original counts in df will be retained in output. 
    verbose: bool, default True
    
    
    '''
    ig_genes = switched.copy()
    ig_genes.extend(nonswitched)
    
    missing_keys = [x for x in ig_genes if x not in ig_cutoffs.keys()]
    if len(missing_keys) > 0:
        raise KeyError("Missing ig cuttoffs for the following: {}".format(", ".join(missing_keys)))
    
    if key not in df.columns:
        raise KeyError("Missing key column '{}' in data frame".format(key))
                                                                           
    if verbose:
        print("Binarizing Ig detection based on cutoffs")
    bool_df = pd.DataFrame({x: df[x]>= ig_cutoffs[x] for x in ig_genes})
    newcols = [x+"_pos" for x in ig_genes]
    bool_df['n_pos'] =  parallelize_row_args(bool_df, count_pos, args = [ig_genes], num_of_processes= num_of_processes)
    bool_df['concat_pos'] =  parallelize_row_args(bool_df, concat_pos, args = [ig_genes], num_of_processes= num_of_processes)
    bool_df['switched_status'] = 'undetermined'
    
    # Determine switch status
    if verbose:
        print("Determining switched status")
    # array of logical subarrays. one subarray per nonswitched gene. bool values for whether cell was pos for gene
    bool_arr = [pd.notna(bool_df.concat_pos.str.extract('('+g+')')[0]) for g in nonswitched]
    is_nonswitched = bool_arr[0]
    i = 1
    while i < len(bool_arr):
        is_nonswitched = np.logical_or(is_nonswitched, bool_arr[i])
        i+=1
    i_nonswitched = bool_df.index[is_nonswitched]
    bool_df.loc[i_nonswitched, 'switched_status'] = 'nonswitched'
    is_switched = np.logical_and(np.logical_not(is_nonswitched), bool_df.concat_pos != '')
    iswitched = bool_df.index[is_switched]
    bool_df.loc[iswitched, 'switched_status'] = 'switched'

    
    # predict isotype
    if verbose:
        print("Predicting isotypes")
    bool_df['isotype'] = determine_isotype(bool_df, gene_order_arr = gene_order_arr, pos_str_col = 'concat_pos', verbose = True)

    # Format for output
    bool_df.rename(columns={ig_genes[i]: newcols[i] for i in range(len(ig_genes))}, inplace=True)

    keep_cols = [key]
    if keep_normcounts is True:
        keep_cols.extend(ig_genes)
        
    res = pd.concat([df[keep_cols], bool_df], axis=1)
        
    return res

### Determine Class Switch and Isotype

In [63]:
# extract key marker data to flat dataframe
keepcols = adata_allb.obs.columns.tolist()
keepcols.extend(ig_genes)
df = sc.get.obs_df(adata_allb, keys = keepcols, layer='X_norm')
print(df.shape)
df.head()

(191220, 73)


Unnamed: 0,barcodes,batch_id,cell_name,cell_uuid,chip_id,hto_barcode,hto_category,n_genes,n_mito_umis,n_reads,...,total_b_counts,IGHG1,IGHG2,IGHG3,IGHG4,IGHA1,IGHA2,IGHM,IGHD,IGHE
9291,2f3d3192226811eea184aec535c48243,B159,regulable_integral_leafwing,2f3d3192226811eea184aec535c48243,B159-P1C1,TTCCGCCTCTCTTTG,singlet,1457,170,19343,...,2365,0.0,0.0,0.0,0.0,0.0,0.0,56.094929,2.157497,0.0
9292,b5aa7f4c120211eb859a46b4e30ed972,B026,fungiform_punctual_ram,b5aa7f4c120211eb859a46b4e30ed972,B026-P2C2,TGTCTTTCCTGCCAG,singlet,2478,282,25517,...,1484,1.184975,3.554924,1.184975,0.0,0.0,0.0,0.0,0.0,0.0
9293,ef4fc95e429511eca55b3612ac783560,B087,semisolemn_tyrannical_tarsier,ef4fc95e429511eca55b3612ac783560,B087-P2C2,TTCCGCCTCTCTTTG,singlet,1988,266,31449,...,1298,0.0,0.0,0.0,0.0,1.372307,1.372307,27.446136,0.0,0.0
9294,456f1d76428711ecbe4f8ede1e1ebd6b,B087,dubnium_weakminded_termite,456f1d76428711ecbe4f8ede1e1ebd6b,B087-P2C3,CTCCTCTGCAATTAC,singlet,1823,208,29279,...,1290,0.0,0.0,0.0,0.0,0.0,0.0,40.580616,14.047136,0.0
9295,f53ac52e429c11ec80811e9b3f1bfe57,B087,bosky_reliable_robin,f53ac52e429c11ec80811e9b3f1bfe57,B087-P2C2,TTCCGCCTCTCTTTG,singlet,1825,354,26537,...,1298,3.183699,0.0,0.0,0.0,1.59185,0.0,17.510347,6.367399,0.0


In [64]:
pd.set_option('display.max_columns', None)
df.groupby(['AIFI_L3_new']).describe()

Unnamed: 0_level_0,n_genes,n_genes,n_genes,n_genes,n_genes,n_genes,n_genes,n_genes,n_mito_umis,n_mito_umis,n_mito_umis,n_mito_umis,n_mito_umis,n_mito_umis,n_mito_umis,n_mito_umis,n_reads,n_reads,n_reads,n_reads,n_reads,n_reads,n_reads,n_reads,n_umis,n_umis,n_umis,n_umis,n_umis,n_umis,n_umis,n_umis,subject.birthYear,subject.birthYear,subject.birthYear,subject.birthYear,subject.birthYear,subject.birthYear,subject.birthYear,subject.birthYear,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,sample.daysSinceFirstVisit,doublet_score,doublet_score,doublet_score,doublet_score,doublet_score,doublet_score,doublet_score,doublet_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L1_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L2_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,AIFI_L3_score,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,log1p_n_genes_by_counts,total_counts,total_counts,total_counts,total_counts,total_counts,total_counts,total_counts,total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,log1p_total_counts,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_50_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_100_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_200_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,pct_counts_in_top_500_genes,total_counts_mito,total_counts_mito,total_counts_mito,total_counts_mito,total_counts_mito,total_counts_mito,total_counts_mito,total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,log1p_total_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,pct_counts_mito,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,anti_ccp3_finalCombined,days_to_conversion,days_to_conversion,days_to_conversion,days_to_conversion,days_to_conversion,days_to_conversion,days_to_conversion,days_to_conversion,BMI,BMI,BMI,BMI,BMI,BMI,BMI,BMI,age_conv,age_conv,age_conv,age_conv,age_conv,age_conv,age_conv,age_conv,bmi_conv,bmi_conv,bmi_conv,bmi_conv,bmi_conv,bmi_conv,bmi_conv,bmi_conv,total_b_counts,total_b_counts,total_b_counts,total_b_counts,total_b_counts,total_b_counts,total_b_counts,total_b_counts,IGHG1,IGHG1,IGHG1,IGHG1,IGHG1,IGHG1,IGHG1,IGHG1,IGHG2,IGHG2,IGHG2,IGHG2,IGHG2,IGHG2,IGHG2,IGHG2,IGHG3,IGHG3,IGHG3,IGHG3,IGHG3,IGHG3,IGHG3,IGHG3,IGHG4,IGHG4,IGHG4,IGHG4,IGHG4,IGHG4,IGHG4,IGHG4,IGHA1,IGHA1,IGHA1,IGHA1,IGHA1,IGHA1,IGHA1,IGHA1,IGHA2,IGHA2,IGHA2,IGHA2,IGHA2,IGHA2,IGHA2,IGHA2,IGHM,IGHM,IGHM,IGHM,IGHM,IGHM,IGHM,IGHM,IGHD,IGHD,IGHD,IGHD,IGHD,IGHD,IGHD,IGHD,IGHE,IGHE,IGHE,IGHE,IGHE,IGHE,IGHE,IGHE
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
AIFI_L3_new,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2,Unnamed: 146_level_2,Unnamed: 147_level_2,Unnamed: 148_level_2,Unnamed: 149_level_2,Unnamed: 150_level_2,Unnamed: 151_level_2,Unnamed: 152_level_2,Unnamed: 153_level_2,Unnamed: 154_level_2,Unnamed: 155_level_2,Unnamed: 156_level_2,Unnamed: 157_level_2,Unnamed: 158_level_2,Unnamed: 159_level_2,Unnamed: 160_level_2,Unnamed: 161_level_2,Unnamed: 162_level_2,Unnamed: 163_level_2,Unnamed: 164_level_2,Unnamed: 165_level_2,Unnamed: 166_level_2,Unnamed: 167_level_2,Unnamed: 168_level_2,Unnamed: 169_level_2,Unnamed: 170_level_2,Unnamed: 171_level_2,Unnamed: 172_level_2,Unnamed: 173_level_2,Unnamed: 174_level_2,Unnamed: 175_level_2,Unnamed: 176_level_2,Unnamed: 177_level_2,Unnamed: 178_level_2,Unnamed: 179_level_2,Unnamed: 180_level_2,Unnamed: 181_level_2,Unnamed: 182_level_2,Unnamed: 183_level_2,Unnamed: 184_level_2,Unnamed: 185_level_2,Unnamed: 186_level_2,Unnamed: 187_level_2,Unnamed: 188_level_2,Unnamed: 189_level_2,Unnamed: 190_level_2,Unnamed: 191_level_2,Unnamed: 192_level_2,Unnamed: 193_level_2,Unnamed: 194_level_2,Unnamed: 195_level_2,Unnamed: 196_level_2,Unnamed: 197_level_2,Unnamed: 198_level_2,Unnamed: 199_level_2,Unnamed: 200_level_2,Unnamed: 201_level_2,Unnamed: 202_level_2,Unnamed: 203_level_2,Unnamed: 204_level_2,Unnamed: 205_level_2,Unnamed: 206_level_2,Unnamed: 207_level_2,Unnamed: 208_level_2,Unnamed: 209_level_2,Unnamed: 210_level_2,Unnamed: 211_level_2,Unnamed: 212_level_2,Unnamed: 213_level_2,Unnamed: 214_level_2,Unnamed: 215_level_2,Unnamed: 216_level_2,Unnamed: 217_level_2,Unnamed: 218_level_2,Unnamed: 219_level_2,Unnamed: 220_level_2,Unnamed: 221_level_2,Unnamed: 222_level_2,Unnamed: 223_level_2,Unnamed: 224_level_2,Unnamed: 225_level_2,Unnamed: 226_level_2,Unnamed: 227_level_2,Unnamed: 228_level_2,Unnamed: 229_level_2,Unnamed: 230_level_2,Unnamed: 231_level_2,Unnamed: 232_level_2,Unnamed: 233_level_2,Unnamed: 234_level_2,Unnamed: 235_level_2,Unnamed: 236_level_2,Unnamed: 237_level_2,Unnamed: 238_level_2,Unnamed: 239_level_2,Unnamed: 240_level_2,Unnamed: 241_level_2,Unnamed: 242_level_2,Unnamed: 243_level_2,Unnamed: 244_level_2,Unnamed: 245_level_2,Unnamed: 246_level_2,Unnamed: 247_level_2,Unnamed: 248_level_2,Unnamed: 249_level_2,Unnamed: 250_level_2,Unnamed: 251_level_2,Unnamed: 252_level_2,Unnamed: 253_level_2,Unnamed: 254_level_2,Unnamed: 255_level_2,Unnamed: 256_level_2,Unnamed: 257_level_2,Unnamed: 258_level_2,Unnamed: 259_level_2,Unnamed: 260_level_2,Unnamed: 261_level_2,Unnamed: 262_level_2,Unnamed: 263_level_2,Unnamed: 264_level_2,Unnamed: 265_level_2,Unnamed: 266_level_2,Unnamed: 267_level_2,Unnamed: 268_level_2,Unnamed: 269_level_2,Unnamed: 270_level_2,Unnamed: 271_level_2,Unnamed: 272_level_2,Unnamed: 273_level_2,Unnamed: 274_level_2,Unnamed: 275_level_2,Unnamed: 276_level_2,Unnamed: 277_level_2,Unnamed: 278_level_2,Unnamed: 279_level_2,Unnamed: 280_level_2,Unnamed: 281_level_2,Unnamed: 282_level_2,Unnamed: 283_level_2,Unnamed: 284_level_2,Unnamed: 285_level_2,Unnamed: 286_level_2,Unnamed: 287_level_2,Unnamed: 288_level_2
Activated memory B cell,349.0,1776.936963,530.823429,623.0,1431.0,1733.0,2009.0,3766.0,349.0,256.25788,141.180703,18.0,152.0,233.0,324.0,799.0,349.0,21207.765043,10961.228264,3080.0,14459.0,18777.0,25884.0,102508.0,349.0,6124.412607,2438.758232,1197.0,4575.0,5904.0,7263.0,19984.0,349.0,1969.661891,14.607322,1941.0,1958.0,1969.0,1979.0,2000.0,349.0,183.627507,307.776413,0.0,0.0,0.0,216.0,1144.0,349.0,0.039925,0.039892,0.004254,0.018151,0.027422,0.049691,0.439803,349.0,1.0,8.919768e-14,1.0,1.0,1.0,1.0,1.0,349.0,0.857843,0.211395,0.048791,0.805765,0.965778,0.995677,0.999989,349.0,1.0,6.21008e-08,0.999999,1.0,1.0,1.0,1.0,349.0,1776.936963,530.823429,623.0,1431.0,1733.0,2009.0,3766.0,349.0,7.438599,0.303284,6.43615,7.266827,7.458186,7.60589,8.234034,349.0,6124.412607,2438.758232,1197.0,4575.0,5904.0,7263.0,19984.0,349.0,8.642138,0.405293,7.088409,8.428581,8.683555,8.890686,9.902737,349.0,40.994874,3.464471,31.912501,38.614561,41.123847,43.403094,51.446837,349.0,55.480938,4.316648,41.582337,52.658327,55.795148,58.691207,68.33782,349.0,65.388973,4.233237,50.802412,62.900562,65.665236,68.270481,76.043069,349.0,77.53189,4.105466,63.487683,75.215187,77.802198,80.363771,89.724311,349.0,256.25788,141.180703,18.0,152.0,233.0,324.0,799.0,349.0,5.390051,0.599841,2.944439,5.030438,5.455321,5.783825,6.684612,349.0,4.174237,1.629989,0.54562,2.989691,3.973408,5.028478,9.198672,344.0,361.770349,629.45013,0.0,22.75,69.0,302.0,4277.0,175.0,-348.28,302.919902,-1144.0,-621.5,-279.0,-95.0,0.0,349.0,27.499295,5.330036,15.464216,24.268078,27.679501,29.187624,43.242856,175.0,49.422857,13.60162,23.0,35.5,49.0,62.0,76.0,175.0,28.3086,5.036593,22.359629,24.550265,28.675467,31.161374,42.650542,349.0,1521.722063,552.977032,551.0,1158.0,1401.0,1841.0,4453.0,349.0,0.855634,1.939776,0.0,0.0,0.0,1.250782,15.485869,349.0,0.732693,1.611692,0.0,0.0,0.0,1.16877,12.460196,349.0,0.716693,1.652874,0.0,0.0,0.0,0.792833,13.844936,349.0,0.189014,0.731417,0.0,0.0,0.0,0.0,7.352941,349.0,2.606704,4.836987,0.0,0.0,0.0,3.243068,29.761906,349.0,0.808474,2.117407,0.0,0.0,0.0,0.0,11.894648,349.0,24.214037,20.607513,0.0,3.345227,24.191111,34.686092,132.106857,349.0,3.772951,5.435934,0.0,0.0,1.546073,5.778677,34.805889,349.0,0.140121,1.164886,0.0,0.0,0.0,0.0,15.485869
CD27+ effector B cell,6453.0,1981.489695,572.355466,489.0,1590.0,1912.0,2297.0,4914.0,6453.0,327.312568,185.140315,0.0,199.0,288.0,415.0,1896.0,6453.0,25921.3479,14259.04299,2590.0,17477.0,23004.0,30646.0,200243.0,6453.0,7324.586859,2849.191782,1018.0,5466.0,6877.0,8717.0,29081.0,6453.0,1972.468929,15.605088,1941.0,1961.0,1974.0,1987.0,2000.0,6453.0,151.040291,237.002856,0.0,0.0,0.0,224.0,1144.0,6453.0,0.028429,0.025807,0.003581,0.015119,0.022087,0.033051,0.474654,6453.0,1.0,2.577047e-07,0.999979,1.0,1.0,1.0,1.0,6453.0,0.843712,0.193465,0.028866,0.764352,0.92654,0.988659,1.0,6453.0,1.0,8.544643e-08,0.999995,1.0,1.0,1.0,1.0,6453.0,1981.489695,572.355466,489.0,1590.0,1912.0,2297.0,4914.0,6453.0,7.550896,0.290158,6.194405,7.372118,7.556428,7.739794,8.500047,6453.0,7324.586859,2849.191782,1018.0,5466.0,6877.0,8717.0,29081.0,6453.0,8.828007,0.382208,6.926577,8.606485,8.836083,9.073145,10.277875,6453.0,41.368352,3.814097,26.927729,38.865637,41.495468,43.929912,54.883907,6453.0,56.197497,4.800988,35.117967,53.183024,56.56343,59.549128,72.071636,6453.0,65.677056,4.587684,44.065336,62.792063,65.992147,68.893457,81.994192,6453.0,77.180175,4.252482,59.37931,74.492926,77.410515,80.145921,100.0,6453.0,327.312568,185.140315,0.0,199.0,288.0,415.0,1896.0,6453.0,5.642564,0.575598,0.0,5.298317,5.666427,6.030685,7.548029,6453.0,4.438735,1.662291,0.0,3.249476,4.228433,5.382739,9.976526,6356.0,428.256923,828.566359,0.0,32.0,56.0,395.0,4342.0,3224.0,-300.723015,276.391302,-1144.0,-516.0,-257.0,-95.0,0.0,6416.0,27.152333,5.715534,15.464216,22.862534,26.042241,29.283079,43.242856,3224.0,45.969603,15.264439,23.0,35.0,46.0,58.0,76.0,3224.0,28.452922,5.715364,22.359629,23.143062,26.194977,31.161374,42.650542,6453.0,1596.34728,640.182945,181.0,1176.0,1556.0,2037.0,4453.0,6453.0,1.382558,2.103033,0.0,0.0,0.0,2.073828,35.746201,6453.0,1.042537,1.878886,0.0,0.0,0.0,1.553398,26.778883,6453.0,1.208306,2.169046,0.0,0.0,0.0,1.702707,24.285992,6453.0,0.355017,0.956245,0.0,0.0,0.0,0.0,24.820055,6453.0,2.580724,4.77422,0.0,0.0,0.887705,3.250975,93.530785,6453.0,0.604635,2.139607,0.0,0.0,0.0,0.0,34.887409,6453.0,26.404444,20.213171,0.0,8.14996,26.87269,38.505096,159.897659,6453.0,2.673513,4.316989,0.0,0.0,1.234111,3.796027,70.796463,6453.0,0.023721,0.43815,0.0,0.0,0.0,0.0,15.957447
CD27- effector B cell,6213.0,1617.78883,468.226358,332.0,1304.0,1558.0,1867.0,4499.0,6213.0,211.776276,122.707417,1.0,127.0,186.0,266.0,1269.0,6213.0,17546.799131,9505.665011,819.0,11963.0,15705.0,21141.0,218241.0,6213.0,5141.347497,1976.39144,630.0,3843.0,4850.0,6132.0,26599.0,6213.0,1963.755995,15.684203,1941.0,1949.0,1961.0,1977.0,2000.0,6213.0,86.166747,180.754301,0.0,0.0,0.0,106.0,1144.0,6213.0,0.026985,0.022557,0.002492,0.013661,0.021099,0.032684,0.493333,6213.0,1.0,3.861188e-06,0.999698,1.0,1.0,1.0,1.0,6213.0,0.905823,0.184898,0.026922,0.922774,0.994129,0.999509,1.0,6213.0,1.0,6.280686e-08,0.999996,1.0,1.0,1.0,1.0,6213.0,1617.78883,468.226358,332.0,1304.0,1558.0,1867.0,4499.0,6213.0,7.348356,0.289594,5.808142,7.173958,7.3518,7.532624,8.411833,6213.0,5141.347497,1976.39144,630.0,3843.0,4850.0,6132.0,26599.0,6213.0,8.475594,0.379234,6.447306,8.254269,8.48694,8.721439,10.188666,6213.0,40.304978,3.303769,27.445803,38.121259,40.340159,42.46988,62.5,6213.0,54.166129,4.191959,37.506948,51.46084,54.285087,57.038262,70.59952,6213.0,64.315632,4.14739,47.359644,61.571065,64.547261,67.15046,82.446809,6213.0,77.34375,4.17193,60.867398,74.789248,77.527216,79.928315,100.0,6213.0,211.776276,122.707417,1.0,127.0,186.0,266.0,1269.0,6213.0,5.204673,0.57823,0.693147,4.85203,5.231109,5.587249,7.146772,6213.0,4.071794,1.556945,0.054318,2.951776,3.844474,4.9981,9.985072,6184.0,581.851552,982.299594,0.0,33.0,58.0,752.0,4342.0,2271.0,-266.983708,266.327188,-1144.0,-401.0,-170.0,-7.0,0.0,6158.0,28.288796,5.112339,15.464216,24.383571,28.15666,31.100305,43.242856,2271.0,55.653897,15.704821,23.0,45.0,62.0,67.0,76.0,2271.0,29.232896,5.455832,22.359629,25.648918,28.675467,33.567347,42.650542,6213.0,1781.185096,989.784367,181.0,1133.0,1500.0,2290.0,4453.0,6213.0,1.897854,3.08063,0.0,0.0,0.0,2.730003,35.985607,6213.0,1.108259,1.863766,0.0,0.0,0.0,1.912229,20.746887,6213.0,2.57209,3.390985,0.0,0.0,1.700102,3.964321,31.13879,6213.0,0.508155,1.311296,0.0,0.0,0.0,0.0,27.274981,6213.0,2.839839,4.820449,0.0,0.0,1.324503,3.881234,67.502411,6213.0,0.504476,2.350054,0.0,0.0,0.0,0.0,92.5,6213.0,23.708239,22.641396,0.0,1.665002,21.291092,38.336773,199.782852,6213.0,9.30226,11.726217,0.0,0.0,5.208333,14.260249,117.30854,6213.0,0.059982,0.770292,0.0,0.0,0.0,0.0,22.315893
CD95 memory B cell,1999.0,2677.537269,648.137992,1013.0,2226.5,2578.0,3041.5,4896.0,1999.0,388.026513,246.789067,7.0,234.0,328.0,469.5,2294.0,1999.0,35899.770885,21143.056198,3320.0,23202.0,30634.0,42212.0,283042.0,1999.0,10188.971486,4179.152243,2306.0,7438.0,9151.0,11875.5,32677.0,1999.0,1966.653327,16.154917,1941.0,1953.0,1963.0,1980.0,2000.0,1999.0,162.183592,266.537797,0.0,0.0,0.0,182.0,1144.0,1999.0,0.034154,0.034316,0.002797,0.016045,0.024044,0.038354,0.321429,1999.0,1.0,5.426247e-15,1.0,1.0,1.0,1.0,1.0,1999.0,0.87886,0.199382,0.021298,0.860044,0.975816,0.995852,0.999987,1999.0,1.0,5.830174e-09,1.0,1.0,1.0,1.0,1.0,1999.0,2677.537269,648.137992,1013.0,2226.5,2578.0,3041.5,4896.0,1999.0,7.864457,0.239351,6.921658,7.708635,7.855157,8.020435,8.496378,1999.0,10188.971486,4179.152243,2306.0,7438.0,9151.0,11875.5,32677.0,1999.0,9.155866,0.377415,7.743703,8.914492,9.121728,9.382317,10.394457,1999.0,35.783444,3.240142,23.431882,33.669901,35.882016,37.917361,52.441938,1999.0,49.132675,4.329232,32.780663,46.381443,49.292856,52.017575,63.113571,1999.0,59.178629,4.28372,43.362596,56.546193,59.413143,62.156194,72.559558,1999.0,71.407781,3.772285,57.032103,69.015275,71.544908,74.086888,83.454387,1999.0,388.026513,246.789067,7.0,234.0,328.0,469.5,2294.0,1999.0,5.797087,0.586,2.079442,5.459586,5.796058,6.153795,7.738488,1999.0,3.798224,1.579752,0.126812,2.694905,3.551816,4.688365,9.875965,1943.0,468.856408,930.688376,0.0,27.0,64.0,343.0,4342.0,1002.0,-288.406188,296.428285,-1144.0,-417.0,-163.0,-92.0,0.0,1981.0,27.856727,5.152048,15.464216,24.46964,27.022574,30.101657,43.242856,1002.0,53.394212,16.732821,23.0,36.0,58.0,67.0,76.0,1002.0,28.656007,4.874477,22.359629,25.648918,26.571405,31.161374,42.650542,1999.0,1451.790895,668.788547,181.0,936.0,1327.0,1744.0,4453.0,1999.0,2.463799,5.343011,0.0,0.0,0.0,3.512175,135.989883,1999.0,0.940539,2.973155,0.0,0.0,0.0,0.810737,72.09182,1999.0,1.604794,2.900218,0.0,0.0,0.0,2.175805,32.194023,1999.0,1.322076,4.11182,0.0,0.0,0.0,1.284439,99.697884,1999.0,6.066151,10.916736,0.0,0.0,1.930129,8.449149,241.58667,1999.0,2.760906,5.580138,0.0,0.0,0.0,3.225598,77.306908,1999.0,6.164883,19.205975,0.0,0.0,0.0,2.177322,218.767136,1999.0,0.564991,3.489058,0.0,0.0,0.0,0.0,79.701302,1999.0,0.541348,2.409484,0.0,0.0,0.0,0.0,36.938534
Core memory B cell,38569.0,1578.945656,416.045756,257.0,1313.0,1563.0,1825.0,4472.0,38569.0,218.243382,120.817641,0.0,135.0,195.0,274.0,1762.0,38569.0,19207.525707,9885.131829,1131.0,13553.0,17614.0,22732.0,239079.0,38569.0,5401.886541,1788.544333,562.0,4280.0,5313.0,6404.0,23315.0,38569.0,1968.902305,15.378986,1941.0,1955.0,1968.0,1980.0,2000.0,38569.0,145.491068,257.520274,0.0,0.0,0.0,175.0,1144.0,38569.0,0.03101,0.032804,0.002797,0.01402,0.021819,0.035261,0.517766,38569.0,0.999978,0.003368234,0.361504,1.0,1.0,1.0,1.0,38569.0,0.911483,0.153654,0.027694,0.907432,0.977879,0.994894,0.999993,38569.0,1.0,9.674718e-08,0.999991,1.0,1.0,1.0,1.0,38569.0,1578.945656,416.045756,257.0,1313.0,1563.0,1825.0,4472.0,38569.0,7.328225,0.280319,5.55296,7.180831,7.355002,7.509883,8.405815,38569.0,5401.886541,1788.544333,562.0,4280.0,5313.0,6404.0,23315.0,38569.0,8.534847,0.363948,6.33328,8.361942,8.5781,8.764834,10.056895,38569.0,43.721758,3.673713,24.855097,41.360072,43.741288,46.13164,68.825301,38569.0,58.446362,4.493537,31.878623,55.617569,58.646953,61.499781,81.132828,38569.0,67.706541,4.332962,41.425162,64.990878,67.905452,70.666918,91.415663,38569.0,79.505598,4.174764,59.590556,76.975537,79.591521,82.001726,100.0,38569.0,218.243382,120.817641,0.0,135.0,195.0,274.0,1762.0,38569.0,5.242762,0.5728,0.0,4.912655,5.278115,5.616771,7.474772,38569.0,3.99568,1.547126,0.0,2.904181,3.766751,4.854229,9.988563,37677.0,380.133291,738.302434,0.0,27.0,56.0,301.0,4342.0,17293.0,-339.349563,297.549594,-1144.0,-552.0,-264.0,-95.0,0.0,38361.0,27.145897,5.453261,15.464216,23.243408,26.042241,28.905868,43.242856,17293.0,50.248713,15.380962,23.0,36.0,49.0,62.0,76.0,17293.0,28.565544,5.747513,22.359629,24.550265,26.194977,31.161374,42.650542,38569.0,1493.709559,601.172776,181.0,1126.0,1391.0,1744.0,4453.0,38569.0,1.534584,2.778676,0.0,0.0,0.0,2.146844,49.407116,38569.0,0.821159,1.781157,0.0,0.0,0.0,1.391208,58.910164,38569.0,1.017423,2.056345,0.0,0.0,0.0,1.629726,40.705563,38569.0,0.457858,1.447153,0.0,0.0,0.0,0.0,35.874439,38569.0,3.392756,5.680082,0.0,0.0,0.0,4.719207,176.151764,38569.0,1.215388,3.206389,0.0,0.0,0.0,0.0,66.765579,38569.0,16.040968,18.96755,0.0,0.0,7.673419,28.809219,231.425095,38569.0,1.641727,3.405577,0.0,0.0,0.0,2.154708,84.415581,38569.0,0.181388,1.238965,0.0,0.0,0.0,0.0,37.819801
Core naive B cell,109212.0,1325.340494,333.421511,281.0,1107.0,1299.0,1508.0,4713.0,109212.0,169.621534,89.357939,0.0,109.0,153.0,211.0,1635.0,109212.0,14138.701654,7288.908409,796.0,10216.0,12822.0,16148.25,198097.0,109212.0,4031.624904,1281.640316,509.0,3246.0,3909.0,4642.0,24934.0,109212.0,1968.543869,15.752987,1941.0,1954.0,1967.0,1984.0,2000.0,109212.0,169.002271,267.216735,0.0,0.0,0.0,238.0,1144.0,109212.0,0.018172,0.019741,0.001391,0.008499,0.01332,0.021286,0.640379,109212.0,0.999971,0.004348449,0.160375,1.0,1.0,1.0,1.0,109212.0,0.934047,0.13482,0.005429,0.946363,0.989105,0.997637,0.999999,109212.0,1.0,1.010014e-07,0.999985,1.0,1.0,1.0,1.0,109212.0,1325.340494,333.421511,281.0,1107.0,1299.0,1508.0,4713.0,109212.0,7.159135,0.251744,5.641907,7.010312,7.17012,7.319202,8.458292,109212.0,4031.624904,1281.640316,509.0,3246.0,3909.0,4642.0,24934.0,109212.0,8.254203,0.315461,6.234411,8.085487,8.271293,8.443116,10.124028,109212.0,41.55156,3.394708,24.878523,39.319263,41.533033,43.760311,62.317328,109212.0,56.604583,4.302341,33.481922,53.825229,56.745073,59.524601,76.952563,109212.0,66.690441,4.207188,45.318681,63.981318,66.846901,69.56182,85.789474,109212.0,79.750142,4.058428,61.934066,77.307267,79.657727,82.054646,100.0,109212.0,169.621534,89.357939,0.0,109.0,153.0,211.0,1635.0,109212.0,5.011172,0.522932,0.0,4.70048,5.036953,5.356586,7.40001,109212.0,4.168561,1.542947,0.0,3.084608,3.980144,5.063291,9.99258,106216.0,584.984644,1100.233352,0.0,27.0,58.0,539.0,4342.0,57524.0,-319.835808,298.449252,-1144.0,-517.0,-257.0,-92.0,0.0,108423.0,27.724216,5.237828,15.464216,23.6996,26.82061,30.101657,43.242856,57524.0,48.975541,15.474989,23.0,35.0,49.0,66.0,76.0,57524.0,27.839061,5.013792,22.359629,24.550265,25.66073,29.187624,42.650542,109212.0,1734.274988,800.296748,181.0,1216.0,1566.0,2287.0,4453.0,109212.0,0.055454,0.493956,0.0,0.0,0.0,0.0,41.968716,109212.0,0.062545,0.4372,0.0,0.0,0.0,0.0,13.818517,109212.0,0.230525,0.920456,0.0,0.0,0.0,0.0,23.130301,109212.0,0.022529,0.286479,0.0,0.0,0.0,0.0,24.286581,109212.0,0.074983,0.543983,0.0,0.0,0.0,0.0,39.664021,109212.0,0.027091,0.287274,0.0,0.0,0.0,0.0,14.684288,109212.0,38.525269,19.050043,0.0,25.43882,35.496956,48.060417,323.753357,109212.0,11.720381,7.620049,0.0,6.249675,10.518934,15.929909,88.495575,109212.0,0.067676,0.614485,0.0,0.0,0.0,0.0,21.447721
Early memory B cell,1448.0,1584.896409,413.853207,542.0,1315.25,1527.0,1791.5,3951.0,1448.0,222.494475,123.096616,32.0,141.0,195.0,274.25,1264.0,1448.0,17730.479282,9503.231926,1458.0,12341.75,15828.5,20425.5,124189.0,1448.0,5104.042818,1876.633043,1092.0,3942.5,4761.0,5762.0,16381.0,1448.0,1968.236188,15.994626,1941.0,1954.0,1968.0,1980.0,2000.0,1448.0,128.944061,223.859441,0.0,0.0,0.0,175.0,1144.0,1448.0,0.027912,0.023247,0.003027,0.014901,0.021919,0.03364,0.372477,1448.0,1.0,1.314095e-13,1.0,1.0,1.0,1.0,1.0,1448.0,0.656533,0.254998,0.018358,0.468356,0.708503,0.876853,0.999292,1448.0,1.0,9.639862e-08,0.999997,1.0,1.0,1.0,1.0,1448.0,1584.896409,413.853207,542.0,1315.25,1527.0,1791.5,3951.0,1448.0,7.337255,0.250409,6.297109,7.182542,7.331715,7.491366,8.281977,1448.0,5104.042818,1876.633043,1092.0,3942.5,4761.0,5762.0,16381.0,1448.0,8.480782,0.332589,6.996681,8.279824,8.468423,8.659213,9.703938,1448.0,41.545626,3.10352,28.886688,39.434065,41.541938,43.533038,54.592423,1448.0,56.038423,3.909066,38.902249,53.508771,56.138542,58.682879,69.328537,1448.0,65.628917,3.791743,47.658792,63.178619,65.771703,68.206338,78.415614,1448.0,78.009206,3.498277,60.093365,75.867083,78.216164,80.228875,96.153846,1448.0,222.494475,123.096616,32.0,141.0,195.0,274.25,1264.0,1448.0,5.277408,0.515857,3.496508,4.955827,5.278115,5.617679,7.142827,1448.0,4.310483,1.544601,0.929954,3.218029,4.079474,5.21253,9.98774,1422.0,398.671589,800.406059,0.0,27.0,51.0,293.0,4342.0,709.0,-311.12835,292.113232,-1144.0,-516.0,-259.0,-92.0,0.0,1439.0,27.422019,5.449639,15.464216,22.862534,26.360544,30.151433,43.242856,709.0,49.320169,15.980228,23.0,36.0,49.0,66.0,76.0,709.0,27.830852,5.916756,22.359629,23.143062,25.66073,31.161374,42.650542,1448.0,1654.130525,758.931805,181.0,1136.0,1550.0,2151.0,4453.0,1448.0,0.661133,1.500921,0.0,0.0,0.0,0.0,17.271156,1448.0,0.46592,1.138532,0.0,0.0,0.0,0.0,8.183306,1448.0,0.850405,1.705306,0.0,0.0,0.0,1.414477,12.647554,1448.0,0.139227,0.602781,0.0,0.0,0.0,0.0,7.439093,1448.0,0.536875,1.684872,0.0,0.0,0.0,0.0,18.354567,1448.0,0.089768,0.515656,0.0,0.0,0.0,0.0,7.791196,1448.0,36.140026,17.513918,0.0,24.656571,34.013437,45.754849,147.55481,1448.0,7.294833,5.777367,0.0,2.95749,6.16302,10.206699,38.088642,1448.0,0.097297,0.820766,0.0,0.0,0.0,0.0,15.994882
ISG+ naive B cell,6302.0,1456.173596,434.200748,455.0,1166.0,1386.0,1656.0,4562.0,6302.0,190.78483,111.978396,1.0,118.0,167.0,233.0,1696.0,6302.0,15812.372104,9035.780642,2179.0,10500.0,13938.0,18665.0,98722.0,6302.0,4574.767851,1902.491222,863.0,3389.25,4217.0,5267.0,26402.0,6302.0,1970.159791,13.791266,1941.0,1961.0,1969.0,1980.0,2000.0,6302.0,79.079181,198.939045,0.0,0.0,0.0,0.0,1144.0,6302.0,0.024232,0.025128,0.001985,0.011256,0.017668,0.02865,0.451923,6302.0,1.0,1.849458e-09,1.0,1.0,1.0,1.0,1.0,6302.0,0.901045,0.202396,0.006657,0.927879,0.996716,0.999811,1.0,6302.0,1.0,1.792964e-07,0.999987,1.0,1.0,1.0,1.0,6302.0,1456.173596,434.200748,455.0,1166.0,1386.0,1656.0,4562.0,6302.0,7.244474,0.278526,6.122493,7.062192,7.234898,7.412764,8.425736,6302.0,4574.767851,1902.491222,863.0,3389.25,4217.0,5267.0,26402.0,6302.0,8.35835,0.367661,6.761573,8.128659,8.347116,8.569406,10.181233,6302.0,41.143483,3.37096,28.212631,39.023029,41.176562,43.283895,54.504073,6302.0,55.762345,4.344888,37.893197,53.0116,55.926818,58.653239,72.704591,6302.0,65.74032,4.212767,47.86116,63.096801,65.962105,68.633707,82.58483,6302.0,78.717234,3.962791,60.723801,76.415993,78.901711,81.149716,100.0,6302.0,190.78483,111.978396,1.0,118.0,167.0,233.0,1696.0,6302.0,5.11385,0.538776,0.693147,4.779123,5.123964,5.455321,7.436617,6302.0,4.144173,1.480466,0.0429,3.102301,4.017444,4.99445,9.927984,6232.0,681.422015,1105.285701,0.0,6.0,51.0,970.0,4342.0,1812.0,-348.444812,290.430931,-1144.0,-552.0,-279.0,-102.0,0.0,6202.0,29.155585,4.708598,15.464216,25.931017,31.100305,32.130207,43.242856,1812.0,46.642384,14.249459,23.0,35.0,46.0,62.0,76.0,1812.0,29.017288,6.443751,22.359629,25.648918,26.024996,31.161374,42.650542,6302.0,1944.161377,1029.384451,181.0,1190.0,1835.0,2295.0,4453.0,6302.0,0.339085,1.38493,0.0,0.0,0.0,0.0,22.002201,6302.0,0.222963,2.044172,0.0,0.0,0.0,0.0,147.438263,6302.0,0.365522,1.307474,0.0,0.0,0.0,0.0,18.744143,6302.0,0.099066,0.607987,0.0,0.0,0.0,0.0,18.130539,6302.0,0.663452,2.783921,0.0,0.0,0.0,0.0,56.647785,6302.0,0.204721,1.278525,0.0,0.0,0.0,0.0,31.732599,6302.0,33.779057,19.718168,0.0,21.392282,32.189083,44.397959,204.081635,6302.0,9.860401,7.861276,0.0,3.74269,8.865249,14.356472,57.522125,6302.0,0.063414,0.656692,0.0,0.0,0.0,0.0,25.363153
Plasma cell,2053.0,3226.519727,899.615559,267.0,2634.0,3265.0,3869.0,4996.0,2053.0,642.658548,441.293018,0.0,322.0,536.0,860.0,2811.0,2053.0,77076.0151,41264.459133,1672.0,49472.0,72495.0,98124.0,518799.0,2053.0,22018.591817,8793.12812,531.0,16039.0,22111.0,28041.0,53458.0,2053.0,1967.735996,15.485183,1941.0,1954.0,1967.0,1977.0,2000.0,2053.0,178.483195,292.779159,0.0,0.0,0.0,204.0,1144.0,2053.0,0.108708,0.046921,0.014515,0.080814,0.100448,0.125464,0.539394,2053.0,0.999174,0.01861034,0.386164,1.0,1.0,1.0,1.0,2053.0,0.993542,0.058093,0.038858,1.0,1.0,1.0,1.0,2053.0,1.0,2.861394e-08,0.999999,1.0,1.0,1.0,1.0,2053.0,3226.519727,899.615559,267.0,2634.0,3265.0,3869.0,4996.0,2053.0,8.029192,0.348244,5.590987,7.876638,8.091321,8.26101,8.516593,2053.0,22018.591817,8793.12812,531.0,16039.0,22111.0,28041.0,53458.0,2053.0,9.889621,0.537445,6.276643,9.682841,10.003876,10.241459,10.88667,2053.0,54.680149,12.186387,24.572417,45.724493,54.007657,63.167847,93.966624,2053.0,63.484908,10.03356,35.827301,56.081892,63.228532,70.663031,95.332844,2053.0,70.931093,8.197423,46.710653,64.989905,70.867382,76.792727,96.772419,2053.0,79.75571,6.217768,60.963423,75.436333,79.608668,83.985994,100.0,2053.0,642.658548,441.293018,0.0,322.0,536.0,860.0,2811.0,2053.0,6.206587,0.829198,0.0,5.777652,6.285998,6.758095,7.941651,2053.0,3.043698,1.813992,0.0,1.744422,2.694242,3.974397,9.965636,2034.0,472.660767,854.230995,0.0,22.0,91.0,384.0,4342.0,920.0,-281.213043,300.260497,-1144.0,-516.0,-140.0,-7.0,0.0,2039.0,27.223215,4.937787,15.464216,24.383571,26.360544,28.905868,43.242856,920.0,51.553261,15.955712,23.0,36.0,49.0,62.0,76.0,920.0,28.335409,5.112451,22.359629,24.550265,26.194977,31.161374,42.650542,2053.0,1364.011203,664.718313,181.0,907.0,1288.0,1621.0,4453.0,2053.0,63.683712,145.277435,0.0,0.0,0.0,37.284649,1360.546875,2053.0,30.298044,110.081116,0.0,0.0,0.0,0.732172,1237.702515,2053.0,28.469927,78.488251,0.0,0.0,0.0,8.144227,1218.370117,2053.0,27.385244,124.560471,0.0,0.0,0.0,2.209554,1764.197998,2053.0,314.610016,400.973602,0.0,2.800728,96.145363,552.68158,2695.386719,2053.0,26.353231,112.10096,0.0,0.0,1.576375,17.13529,2261.504639,2053.0,89.654015,309.876465,0.0,0.0,0.0,2.923488,3349.075195,2053.0,9.985684,77.388931,0.0,0.0,0.0,0.0,1057.31665,2053.0,0.823077,19.389757,0.0,0.0,0.0,0.0,792.036987
Transitional B cell,15358.0,1480.446543,386.317651,368.0,1218.0,1434.0,1689.0,4611.0,15358.0,175.191692,95.071839,1.0,110.0,156.0,219.0,1410.0,15358.0,16164.428702,8623.884858,1103.0,11238.25,14460.0,18814.75,144208.0,15358.0,4645.264162,1638.370802,556.0,3566.0,4378.0,5414.0,23717.0,15358.0,1970.003842,16.472521,1941.0,1954.0,1969.0,1984.0,2000.0,15358.0,154.868538,252.587868,0.0,0.0,0.0,216.0,1144.0,15358.0,0.01667,0.016067,0.001964,0.008451,0.012787,0.01981,0.606897,15358.0,1.0,1.288729e-09,1.0,1.0,1.0,1.0,1.0,15358.0,0.872616,0.179101,0.014795,0.803679,0.962645,0.998896,1.0,15358.0,1.0,1.70783e-08,0.999998,1.0,1.0,1.0,1.0,15358.0,1480.446543,386.317651,368.0,1218.0,1434.0,1689.0,4611.0,15358.0,7.268148,0.255742,5.910797,7.105786,7.26892,7.432484,8.436417,15358.0,4645.264162,1638.370802,556.0,3566.0,4378.0,5414.0,23717.0,15358.0,8.387312,0.335864,6.322565,8.17948,8.384576,8.596928,10.07399,15358.0,40.446559,3.311337,27.035426,38.257858,40.4363,42.645284,56.93407,15358.0,55.391854,4.290299,36.047234,52.624306,55.571807,58.334732,73.224044,15358.0,65.543774,4.159294,47.016781,62.852419,65.769865,68.400271,81.590909,15358.0,78.472094,3.802946,61.745959,76.072699,78.59685,80.914141,100.0,15358.0,175.191692,95.071839,1.0,110.0,156.0,219.0,1410.0,15358.0,5.036926,0.532777,0.693147,4.70953,5.056246,5.393628,7.252054,15358.0,3.754861,1.421569,0.041017,2.76025,3.586042,4.56975,9.993553,14800.0,717.087297,1225.307526,0.0,30.0,90.0,808.0,4342.0,7913.0,-318.767219,299.766933,-1144.0,-517.0,-238.0,-92.0,0.0,15256.0,27.004082,4.928816,15.464216,23.143062,25.987896,29.187624,43.242856,7913.0,44.110451,15.290602,23.0,35.0,44.0,58.0,76.0,7913.0,26.444978,4.048549,22.359629,23.143062,25.654874,26.194977,42.650542,15358.0,1834.744563,890.3229,181.0,1268.0,1587.0,2365.0,4453.0,15358.0,0.012486,0.183293,0.0,0.0,0.0,0.0,8.399832,15358.0,0.007228,0.13463,0.0,0.0,0.0,0.0,6.044731,15358.0,0.028075,0.297737,0.0,0.0,0.0,0.0,8.156607,15358.0,0.009423,0.173337,0.0,0.0,0.0,0.0,10.460251,15358.0,0.063894,0.480375,0.0,0.0,0.0,0.0,21.195421,15358.0,0.022588,0.253789,0.0,0.0,0.0,0.0,7.613247,15358.0,39.760574,20.464687,0.0,25.769875,35.923191,49.347639,279.850739,15358.0,13.256676,7.835478,0.0,7.645625,12.073649,17.556969,69.484657,15358.0,0.003855,0.144945,0.0,0.0,0.0,0.0,12.682308


Predict isotypes using cutoff of 2

In [65]:
%%time
ig_types2 = classify_ig_df(
    df, 
    key = 'barcodes',
    ig_cutoffs = {
       'IGHG1':2,
       'IGHG2':2,
       'IGHG3':2,
       'IGHG4':2,
       'IGHA1':2,
       'IGHA2':2,
       'IGHM':2,
       'IGHD':2,
       'IGHE':2
    },
    num_of_processes = 14
)

Binarizing Ig detection based on cutoffs
Determining switched status
Predicting isotypes
141691 matches for gene IGHM;IGHD
24856 matches for gene IGHM
513 matches for gene IGHD
6836 matches for gene IGHG3
4876 matches for gene IGHG1
8542 matches for gene IGHA1
1054 matches for gene IGHG2
627 matches for gene IGHG4
127 matches for gene IGHE
354 matches for gene IGHA2
CPU times: user 7.91 s, sys: 8.32 s, total: 16.2 s
Wall time: 21.2 s


Predict isotypes using cutoff of 0.5

In [66]:
%%time
ig_types05 = classify_ig_df(
    df, 
    key = 'barcodes',
    ig_cutoffs = {
       'IGHG1':0.5,
       'IGHG2':0.5,
       'IGHG3':0.5,
       'IGHG4':0.5,
       'IGHA1':0.5,
       'IGHA2':0.5,
       'IGHM':0.5,
       'IGHD':0.5,
       'IGHE':0.5
    },
    num_of_processes = 14
)

Binarizing Ig detection based on cutoffs
Determining switched status
Predicting isotypes
146917 matches for gene IGHM;IGHD
21357 matches for gene IGHM
607 matches for gene IGHD
9053 matches for gene IGHG3
4349 matches for gene IGHG1
7040 matches for gene IGHA1
705 matches for gene IGHG2
382 matches for gene IGHG4
59 matches for gene IGHE
203 matches for gene IGHA2
CPU times: user 8.04 s, sys: 8.23 s, total: 16.3 s
Wall time: 21.4 s


### Check Results

In [69]:
ig_types05.switched_status.value_counts()

nonswitched     168881
switched         21791
undetermined       548
Name: switched_status, dtype: int64

In [70]:
ig_types2.switched_status.value_counts()

nonswitched     167060
switched         22416
undetermined      1744
Name: switched_status, dtype: int64

In [71]:
dfbmem = df.loc[df.AIFI_L3_new.isin(bmem_types),].copy()

In [72]:
dfoth = df.loc[df.AIFI_L3_new.isin(bother_types),].copy()

In [73]:
dfnaive = df.loc[df.AIFI_L3_new.isin(bnaive_types),].copy()
dfnaive.shape

(130872, 73)

In [74]:
dfplasma = df.loc[df.AIFI_L3_new.isin(['Plasma cell']),].copy()
dfplasma.shape

(2053, 73)

In [75]:
test2 = ig_types2.loc[ig_types2.barcodes.isin(dfbmem.barcodes.values),].copy()
test2.switched_status.value_counts()

nonswitched     36201
switched        20452
undetermined     1642
Name: switched_status, dtype: int64

In [76]:
test05 = ig_types05.loc[ig_types05.barcodes.isin(dfbmem.barcodes.values),].copy()
test05.switched_status.value_counts()

nonswitched     37921
switched        19891
undetermined      483
Name: switched_status, dtype: int64

In [77]:
test05.isotype.value_counts()

IGHM;IGHD       22423
IGHM            15023
IGHG3            8176
IGHA1            6272
IGHG1            4148
IGHG2             674
undetermined      483
IGHD              475
IGHG4             365
IGHA2             200
IGHE               56
Name: isotype, dtype: int64

In [78]:
test2.isotype.value_counts()

IGHM;IGHD       18181
IGHM            17640
IGHA1            7586
IGHG3            6120
IGHG1            4663
undetermined     1642
IGHG2            1018
IGHG4             595
IGHD              380
IGHA2             347
IGHE              123
Name: isotype, dtype: int64

In [79]:
test2naive = ig_types2.loc[ig_types2.barcodes.isin(dfnaive.barcodes.values),].copy()
test2naive.switched_status.value_counts()

nonswitched     130273
switched           498
undetermined       101
Name: switched_status, dtype: int64

In [80]:
test2naive = ig_types2.loc[ig_types2.barcodes.isin(dfnaive.barcodes.values),].copy()
test2naive.switched_status.value_counts()/test2naive.shape[0]*100

nonswitched     99.542301
switched         0.380524
undetermined     0.077175
Name: switched_status, dtype: float64

In [81]:
test05naive = ig_types05.loc[ig_types05.barcodes.isin(dfnaive.barcodes.values),].copy()
test05naive.switched_status.value_counts()

nonswitched     130320
switched           487
undetermined        65
Name: switched_status, dtype: int64

In [82]:
test05naive = ig_types05.loc[ig_types05.barcodes.isin(dfnaive.barcodes.values),].copy()
test05naive.switched_status.value_counts()/test05naive.shape[0]*100

nonswitched     99.578214
switched         0.372119
undetermined     0.049667
Name: switched_status, dtype: float64

In [83]:
test2plasma = ig_types2.loc[ig_types2.barcodes.isin(dfplasma.barcodes.values),].copy()
test2plasma.switched_status.value_counts()

switched        1466
nonswitched      586
undetermined       1
Name: switched_status, dtype: int64

In [84]:
test2plasma = ig_types2.loc[ig_types2.barcodes.isin(dfplasma.barcodes.values),].copy()
test2plasma.switched_status.value_counts()/test2plasma.shape[0]*100

switched        71.407696
nonswitched     28.543595
undetermined     0.048709
Name: switched_status, dtype: float64

In [85]:
test05plasma = ig_types05.loc[ig_types05.barcodes.isin(dfplasma.barcodes.values),].copy()
test05plasma.switched_status.value_counts()

switched       1413
nonswitched     640
Name: switched_status, dtype: int64

In [86]:
test05plasma = ig_types05.loc[ig_types05.barcodes.isin(dfplasma.barcodes.values),].copy()
test05plasma.switched_status.value_counts()/test05plasma.shape[0]*100

switched       68.826108
nonswitched    31.173892
Name: switched_status, dtype: float64

In [89]:
test05oth = ig_types05.loc[ig_types05.barcodes.isin(dfoth.barcodes.values),].copy()
test05oth.switched_status.value_counts()/test05oth.shape[0]*100

nonswitched     98.521723
switched         1.429377
undetermined     0.048900
Name: switched_status, dtype: float64

In [91]:
test2oth = ig_types2.loc[ig_types2.barcodes.isin(dfoth.barcodes.values),].copy()
test2oth.switched_status.value_counts()/test2oth.shape[0]*100

nonswitched     98.445740
switched         1.477525
undetermined     0.076735
Name: switched_status, dtype: float64

In [90]:
test05oth.isotype.value_counts()

IGHM;IGHD       124494
IGHM              6334
IGHG3              877
IGHA1              768
IGHG1              201
IGHD               132
undetermined        65
IGHG2               31
IGHG4               17
IGHA2                3
IGHE                 3
Name: isotype, dtype: int64

In [92]:
test2oth.isotype.value_counts()

IGHM;IGHD       123510
IGHM              7216
IGHA1              956
IGHG3              716
IGHG1              213
IGHD               133
undetermined       102
IGHG2               36
IGHG4               32
IGHA2                7
IGHE                 4
Name: isotype, dtype: int64

In [93]:
test05.groupby(['switched_status','n_pos']).size()

switched_status  n_pos
nonswitched      1         1916
                 2         8490
                 3        11664
                 4         9228
                 5         4556
                 6         1730
                 7          309
                 8           28
switched         1         2923
                 2         7911
                 3         5426
                 4         2695
                 5          815
                 6          118
                 7            3
undetermined     0          483
dtype: int64

In [94]:
test2.groupby(['switched_status','n_pos']).size()

switched_status  n_pos
nonswitched      1         5681
                 2        13626
                 3        10210
                 4         4788
                 5         1476
                 6          373
                 7           45
                 8            2
switched         1         6548
                 2         9241
                 3         3410
                 4         1013
                 5          219
                 6           20
                 7            1
undetermined     0         1642
dtype: int64

### Save

Merge cell metadata and Ig count data to isotype results

In [67]:
df.to_csv("_output/certpro/normcount_obs_df_allb_forcheck.csv")  

In [95]:
ig_types2.columns

Index(['barcodes', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHA1', 'IGHA2',
       'IGHE', 'IGHM', 'IGHD', 'IGHG1_pos', 'IGHG2_pos', 'IGHG3_pos',
       'IGHG4_pos', 'IGHA1_pos', 'IGHA2_pos', 'IGHE_pos', 'IGHM_pos',
       'IGHD_pos', 'n_pos', 'concat_pos', 'switched_status', 'isotype'],
      dtype='object')

In [96]:
df_ig = df.merge(ig_types2, how = 'left')

In [97]:
df_ig.to_csv(os.path.join(outdir, 'adata-all-b_isotypes_cutoff2.csv'), index=False)  

In [98]:
df_ig05 = df.merge(ig_types05, how = 'left')

In [99]:
df_ig05.to_csv(os.path.join(outdir, 'adata-all-b_isotypes_cutoff0.5.csv'), index=False)  

## Make Pseudobulk

Pseudobulk B cells by L3 celltype and isotype (use 0.5 cutoff for isotype assignment)

In [100]:
import decoupler as dc

In [101]:
adata_allb.obs = adata_allb.obs.merge(how='left', right=ig_types05, left_index=True, right_index=True)

In [102]:
ig_types05.columns

Index(['barcodes', 'IGHG1', 'IGHG2', 'IGHG3', 'IGHG4', 'IGHA1', 'IGHA2',
       'IGHE', 'IGHM', 'IGHD', 'IGHG1_pos', 'IGHG2_pos', 'IGHG3_pos',
       'IGHG4_pos', 'IGHA1_pos', 'IGHA2_pos', 'IGHE_pos', 'IGHM_pos',
       'IGHD_pos', 'n_pos', 'concat_pos', 'switched_status', 'isotype'],
      dtype='object')

In [103]:
for x in ig_genes:
    print(x)
    adata_allb.obs = adata_allb.obs.drop(columns=x)

IGHG1
IGHG2
IGHG3
IGHG4
IGHA1
IGHA2
IGHM
IGHD
IGHE


In [104]:
adata_allb.obs['celltype_isotype'] = [str(x) + "_" + str(y) for x,y in zip(adata_allb.obs['AIFI_L3_new'].values, adata_allb.obs['isotype'].values)]

In [105]:
adata_allb.obs.celltype_isotype.value_counts()

Core naive B cell_IGHM;IGHD      104055
Transitional B cell_IGHM;IGHD     15030
Core memory B cell_IGHM;IGHD      12662
Core memory B cell_IGHM           11058
ISG+ naive B cell_IGHM;IGHD        5375
                                  ...  
Transitional B cell_IGHG3             1
Early memory B cell_IGHG4             1
CD27- effector B cell_IGHE            1
Plasma cell_IGHE                      1
Plasma cell_IGHA2                     1
Name: celltype_isotype, Length: 104, dtype: int64

In [106]:
ig_index = [i for i in range(len(adata_allb.var_names)) if adata_allb.var_names[i] in ig_genes]

In [107]:
# use decoupler to make pseudobulk of all b cells by isotype and celltype. 
# isotype cutoff = 0.5, min cells = 20
adata_pb = dc.get_pseudobulk(
    adata = adata_allb, 
    sample_col = "sample.sampleKitGuid", 
    groups_col = 'celltype_isotype', 
    layer = 'X_raw',
    min_cells=20, 
    min_counts=1000,
    mode='sum'
)

In [108]:
# Summarize total counts in each pseudobulk
adata_pb.obs.psbulk_counts.describe()

count    1.156000e+03
mean     6.723391e+05
std      1.135757e+06
min      5.055600e+04
25%      1.618782e+05
50%      2.736015e+05
75%      5.567242e+05
max      1.113675e+07
Name: psbulk_counts, dtype: float64

In [109]:
# Summarize total counts in each pseudobulk
adata_pb.obs.psbulk_counts.describe()

count    1.156000e+03
mean     6.723391e+05
std      1.135757e+06
min      5.055600e+04
25%      1.618782e+05
50%      2.736015e+05
75%      5.567242e+05
max      1.113675e+07
Name: psbulk_counts, dtype: float64

In [110]:
[x for x in adata_pb.obs.columns if adata_pb.obs[x].dtype == 'object']

['batch_id',
 'hto_barcode',
 'hto_category',
 'pbmc_sample_id',
 'pool_id',
 'sample.sampleKitGuid',
 'cohort.cohortGuid',
 'subject.subjectGuid',
 'subject.biologicalSex',
 'subject.race',
 'subject.ethnicity',
 'subject.birthYear',
 'sample.visitName',
 'sample.drawDate',
 'sample.diseaseStatesRecordedAtVisit',
 'sample.daysSinceFirstVisit',
 'file.id',
 'subset_grp',
 'predicted_doublet',
 'AIFI_L3',
 'doublets_manual',
 'AIFI_L3_new',
 'Status_Xsec',
 'Status_Long',
 'anti_ccp3_finalCombined',
 'days_to_conversion',
 'BMI',
 'CMV_Status_Subj',
 'age_conv',
 'bmi_conv',
 'file.batchID',
 'status',
 'total_b_counts',
 'IGHM_pos',
 'IGHD_pos',
 'switched_status',
 'isotype',
 'celltype_isotype']

In [111]:
adata_pb.obs.columns

Index(['batch_id', 'hto_barcode', 'hto_category', 'pbmc_sample_id', 'pool_id',
       'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid',
       'subject.biologicalSex', 'subject.race', 'subject.ethnicity',
       'subject.birthYear', 'sample.visitName', 'sample.drawDate',
       'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit',
       'file.id', 'subset_grp', 'predicted_doublet', 'AIFI_L3',
       'doublets_manual', 'AIFI_L3_new', 'Status_Xsec', 'Status_Long',
       'anti_ccp3_finalCombined', 'days_to_conversion', 'BMI',
       'CMV_Status_Subj', 'age_conv', 'bmi_conv', 'file.batchID', 'status',
       'total_b_counts', 'IGHM_pos', 'IGHD_pos', 'switched_status', 'isotype',
       'celltype_isotype', 'psbulk_n_cells', 'psbulk_counts'],
      dtype='object')

In [112]:
adata_pb.obs.IGHM_pos

KT00057_CD27+ effector B cell_IGHM               True
KT00077_CD27+ effector B cell_IGHM               True
KT00099_CD27+ effector B cell_IGHM               True
KT00120_CD27+ effector B cell_IGHM               True
KT00221_CD27+ effector B cell_IGHM               True
                                                ...  
KT02817_Type 2 polarized memory B cell_IGHG3    False
KT02845_Type 2 polarized memory B cell_IGHG3    False
KT00069_Type 2 polarized memory B cell_IGHM      True
KT00092_Type 2 polarized memory B cell_IGHM      True
KT00477_Type 2 polarized memory B cell_IGHM      True
Name: IGHM_pos, Length: 1156, dtype: object

In [113]:
adata_pb.layers['counts'] = adata_pb.X.copy()

In [114]:
adata_pb.obs.age_conv = adata_pb.obs.age_conv.astype('float64')

In [115]:
adata_pb.obs['IGHM_pos'] = adata_pb.obs['IGHM_pos'].astype('bool')

In [116]:
adata_pb.obs['IGHD_pos'] = adata_pb.obs['IGHD_pos'].astype('bool')

In [117]:
adata_pb.obs['predicted_doublet'] = adata_pb.obs['predicted_doublet'].astype('bool')

In [118]:
adata_pb.obs['sample.daysSinceFirstVisit'] = adata_pb.obs['sample.daysSinceFirstVisit'].astype('int64')

In [119]:
adata_pb.obs.total_b_counts = adata_pb.obs.total_b_counts.astype('int64')

In [120]:
adata_pb.obs.anti_ccp3_finalCombined = adata_pb.obs.anti_ccp3_finalCombined.astype('float64')

In [121]:
adata_pb.obs['days_to_conversion'] = adata_pb.obs['days_to_conversion'].astype('float64')

In [122]:
adata_pb.obs['BMI'] = adata_pb.obs['BMI'].astype('float64')

In [123]:
adata_pb.obs['bmi_conv'] = adata_pb.obs['bmi_conv'].astype('float64')

In [124]:
adata_pb.obs['subject.birthYear'] = adata_pb.obs['subject.birthYear'].astype('int64')

In [125]:
adata_pb.obs['subject.birthYear'] = adata_pb.obs['subject.birthYear'].astype('int64')

### Save

In [126]:
out_ad_fp = os.path.join(outdir,'ALTRA_certPro_scRNA_141_samples_combined_adata_B_isotype_pseudobulk_20cell.h5ad')
adata_pb.write_h5ad(out_ad_fp)

## Make Test Pseudobulk

Pseudobulk B cells by L3 celltype only for comparison to Zi's analysis

In [127]:
adata_allb.obs.AIFI_L3_new.value_counts()

Core naive B cell                 109212
Core memory B cell                 38569
Transitional B cell                15358
CD27+ effector B cell               6453
ISG+ naive B cell                   6302
CD27- effector B cell               6213
Type 2 polarized memory B cell      3264
Plasma cell                         2053
CD95 memory B cell                  1999
Early memory B cell                 1448
Activated memory B cell              349
Name: AIFI_L3_new, dtype: int64

In [128]:
ig_index = [i for i in range(len(adata_allb.var_names)) if adata_allb.var_names[i] in ig_genes]

In [129]:
# use decoupler to make pseudobulk of all b cells by isotype and celltype. 
# isotype cutoff = 0.5, min cells = 20
adata_pb_L3 = dc.get_pseudobulk(
    adata = adata_allb, 
    sample_col = "sample.sampleKitGuid", 
    groups_col = 'AIFI_L3_new', 
    layer = 'X_raw',
    min_cells=20, 
    min_counts=1000,
    mode='sum'
)

In [130]:
# Summarize total counts in each pseudobulk
adata_pb_L3.obs.psbulk_counts.describe()

count    8.130000e+02
mean     1.064851e+06
std      1.407353e+06
min      6.040000e+04
25%      2.073790e+05
50%      4.691180e+05
75%      1.422112e+06
max      1.151130e+07
Name: psbulk_counts, dtype: float64

In [131]:
[x for x in adata_pb_L3.obs.columns if adata_pb_L3.obs[x].dtype == 'object']

['batch_id',
 'hto_barcode',
 'hto_category',
 'pbmc_sample_id',
 'pool_id',
 'sample.sampleKitGuid',
 'cohort.cohortGuid',
 'subject.subjectGuid',
 'subject.biologicalSex',
 'subject.race',
 'subject.ethnicity',
 'subject.birthYear',
 'sample.visitName',
 'sample.drawDate',
 'sample.diseaseStatesRecordedAtVisit',
 'sample.daysSinceFirstVisit',
 'file.id',
 'subset_grp',
 'predicted_doublet',
 'AIFI_L3',
 'doublets_manual',
 'AIFI_L3_new',
 'Status_Xsec',
 'Status_Long',
 'anti_ccp3_finalCombined',
 'days_to_conversion',
 'BMI',
 'CMV_Status_Subj',
 'age_conv',
 'bmi_conv',
 'file.batchID',
 'status',
 'total_b_counts']

In [132]:
adata_pb_L3

AnnData object with n_obs × n_vars = 813 × 26049
    obs: 'batch_id', 'hto_barcode', 'hto_category', 'pbmc_sample_id', 'pool_id', 'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid', 'subject.biologicalSex', 'subject.race', 'subject.ethnicity', 'subject.birthYear', 'sample.visitName', 'sample.drawDate', 'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit', 'file.id', 'subset_grp', 'predicted_doublet', 'AIFI_L3', 'doublets_manual', 'AIFI_L3_new', 'Status_Xsec', 'Status_Long', 'anti_ccp3_finalCombined', 'days_to_conversion', 'BMI', 'CMV_Status_Subj', 'age_conv', 'bmi_conv', 'file.batchID', 'status', 'total_b_counts', 'psbulk_n_cells', 'psbulk_counts'
    var: 'mean', 'std'
    layers: 'psbulk_props'

In [133]:
adata_pb_L3.obs.columns

Index(['batch_id', 'hto_barcode', 'hto_category', 'pbmc_sample_id', 'pool_id',
       'sample.sampleKitGuid', 'cohort.cohortGuid', 'subject.subjectGuid',
       'subject.biologicalSex', 'subject.race', 'subject.ethnicity',
       'subject.birthYear', 'sample.visitName', 'sample.drawDate',
       'sample.diseaseStatesRecordedAtVisit', 'sample.daysSinceFirstVisit',
       'file.id', 'subset_grp', 'predicted_doublet', 'AIFI_L3',
       'doublets_manual', 'AIFI_L3_new', 'Status_Xsec', 'Status_Long',
       'anti_ccp3_finalCombined', 'days_to_conversion', 'BMI',
       'CMV_Status_Subj', 'age_conv', 'bmi_conv', 'file.batchID', 'status',
       'total_b_counts', 'psbulk_n_cells', 'psbulk_counts'],
      dtype='object')

In [134]:
adata_pb_L3.layers['counts'] = adata_pb_L3.X.copy()

In [135]:
adata_pb_L3.obs.age_conv = adata_pb_L3.obs.age_conv.astype('float64')

In [136]:
adata_pb_L3.obs['predicted_doublet'] = adata_pb_L3.obs['predicted_doublet'].astype('bool')

In [137]:
adata_pb_L3.obs['sample.daysSinceFirstVisit'] = adata_pb_L3.obs['sample.daysSinceFirstVisit'].astype('int64')

In [138]:
adata_pb_L3.obs.total_b_counts = adata_pb_L3.obs.total_b_counts.astype('int64')

In [139]:
adata_pb_L3.obs.anti_ccp3_finalCombined = adata_pb_L3.obs.anti_ccp3_finalCombined.astype('float64')

In [140]:
adata_pb_L3.obs['days_to_conversion'] = adata_pb_L3.obs['days_to_conversion'].astype('float64')

In [141]:
adata_pb_L3.obs['BMI'] = adata_pb_L3.obs['BMI'].astype('float64')

In [142]:
adata_pb_L3.obs['bmi_conv'] = adata_pb_L3.obs['bmi_conv'].astype('float64')

In [143]:
adata_pb_L3.obs['subject.birthYear'] = adata_pb_L3.obs['subject.birthYear'].astype('int64')

In [144]:
adata_pb_L3.obs['subject.birthYear'] = adata_pb_L3.obs['subject.birthYear'].astype('int64')

### Save

In [145]:
out_ad_fp = os.path.join(outdir,'ALTRA_certPro_scRNA_141_samples_combined_adata_B_L3_pseudobulk_20cell.h5ad')
adata_pb_L3.write_h5ad(out_ad_fp)

## Upload Results

In [155]:
{x['name'] : x['id'] for x in hp.get_study_spaces()}

{'In vitro T cell perturbations caused by VRd therapy': '40df6403-29f0-4b45-ab7d-f46d420c422e',
 'TEA-seq: Simultaneous trimodal single-cell measurement of transcripts, epitopes, and chromatin accessibility': '350ce228-b130-4524-914d-7dcb7befe6a5',
 'Custom Dash App Plot Development': 'd72fb3ce-3a6d-41b5-9bb1-5fd3159312da',
 'Pre-RA Longitudinal Analysis': '652f8bc9-e0bd-45f6-82a5-b9ff524c0c70',
 'HISE NOVA Dev': '119f0b9e-6717-4892-a163-a0a986db8a37',
 'preRA cross-sectional + earlyRA + Longitudinal analysis': '223de760-9624-45bd-aefe-ca24c75b1800',
 'IH&A Analysis': 'de025812-5e73-4b3c-9c3b-6d0eac412f2a',
 'IH&A Data Apps (for testing)': 'c1e0e3d8-9914-43b4-a932-1776c6197908'}

In [157]:
ssid = '223de760-9624-45bd-aefe-ca24c75b1800'

In [158]:
fid_in

['609f7543-d4d5-41e9-a3d2-8e50c3e7c61d',
 '89859cec-8d36-47fd-a165-f0f34ae90284']

In [160]:
# Cell level isotype calls, cutoff 0.5
hp.upload_files(
    files = ['./_output/certpro/adata-all-b_isotypes_cutoff0.5.csv'],
    study_space_id = ssid,
    title = 'scRNA B Cell Isotype Analysis Output',
    input_file_ids = fid_in,
    destination = 'scRNA_BCellIsotype/v1'
)

Cannot determine the current notebook.
1) /home/jupyter/ra-longitudinal/scRNA/Aim3_switched-followup/01_py_AssignIsotype.ipynb
2) /home/jupyter/ra-longitudinal/scRNA/Aim3_switched-followup/02_R_SwitchedFrequencyAnalysis.ipynb
3) /home/jupyter/ra-longitudinal/scRNA/Aim3_switched-followup/03_R_IGHExpressionAnalysis_byIsotype.ipynb
Please select (1-3) 


 1


you are trying to upload file_ids... ['./_output/certpro/adata-all-b_isotypes_cutoff0.5.csv']. Do you truly want to proceed?


(y/n) y


{'trace_id': '9022459a-13ee-4126-9830-ff85f04e7d66',
 'files': ['./_output/certpro/adata-all-b_isotypes_cutoff0.5.csv']}

In [161]:
# Cell level isotype calls, cutoff 2
hp.upload_files(
    files = ['./_output/certpro/adata-all-b_isotypes_cutoff2.csv'],
    study_space_id = ssid,
    title = 'scRNA B Cell Isotype Counts, cell level table, cutoff 2',
    input_file_ids = fid_in,
    destination = 'scRNA_BCellIsotype/v1'
)

you are trying to upload file_ids... ['./_output/certpro/adata-all-b_isotypes_cutoff2.csv']. Do you truly want to proceed?


(y/n) y


{'trace_id': '0afb172c-3645-4bdf-b5de-ec9c2eba02aa',
 'files': ['./_output/certpro/adata-all-b_isotypes_cutoff2.csv']}

In [163]:
# All Cell Isotype Thresholds
hp.upload_files(
    files = ["./_output/certpro/check_igh_normcounts_alltypes.csv"],
    study_space_id = ssid,
    title = 'scRNA All Celltype IgH Isotype Threshold Check',
    input_file_ids = ['609f7543-d4d5-41e9-a3d2-8e50c3e7c61d'],
    destination = 'scRNA_BCellIsotype/v1'
)

you are trying to upload file_ids... ['./_output/certpro/check_igh_normcounts_alltypes.csv']. Do you truly want to proceed?


(y/n) y


{'trace_id': '98269fd8-fee7-4efc-a4c0-ad9e7fbb4785',
 'files': ['./_output/certpro/check_igh_normcounts_alltypes.csv']}

In [164]:
# Pseudobulk by Isotype B cell object
hp.upload_files(
    files = ['./_output/certpro/ALTRA_certPro_scRNA_141_samples_combined_adata_B_isotype_pseudobulk_20cell.h5ad'],
    study_space_id = ssid,
    title = 'scRNA B Cell Isotype Pseudobulk H5ad',
    input_file_ids = fid_in,
    destination = 'scRNA_BCellIsotype/v1'
)

you are trying to upload file_ids... ['./_output/certpro/ALTRA_certPro_scRNA_141_samples_combined_adata_B_isotype_pseudobulk_20cell.h5ad']. Do you truly want to proceed?


(y/n) y


{'trace_id': 'e149353f-abef-42b7-93db-55d9c73b3c09',
 'files': ['./_output/certpro/ALTRA_certPro_scRNA_141_samples_combined_adata_B_isotype_pseudobulk_20cell.h5ad']}

## Session Info

In [165]:
import session_info

In [166]:
session_info.show()