# Testing/Benchmarking Celltypist Models 
### List of Models (made in Making New Models.ipynb)
1. Remove the feature selection from CellTypist (so it only trains the model once)
2. Train the model with L1 regularization instead of L2
3. Train the model only once with only Cytopus genes
4. At the feature selection step, make sure the Cytopus genes are included in the list of top genes

In [1]:
import scanpy as sc
import pandas as pd
import anndata as ad
from anndata import AnnData
import numpy as np
from scipy.sparse import spmatrix
from datetime import datetime
import itertools
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt

#import celltypist as ct
#from celltypist import models

from  annotate import annotate


In [None]:
#Import models 
##Celltypist default model 
models.download_models(model = 'Immune_All_Low.pkl')

##New Models
model_1 = models.Model.load('New Models/CT_45 Models/ct_model_1.pkl')
model_2 = models.Model.load('New Models/CT_45 Models/ct_model_2.pkl')
model_3 = models.Model.load('New Models/CT_45 Models/ct_model_3.pkl')
model_4 = models.Model.load('New Models/CT_45 Models/ct_model_4.pkl')

In [None]:
models.download_models(model = 'Healthy_COVID19_PBMC.pkl')

## Get celltype predictions from each model

### Using CT_45 Models

In [None]:
#Import test data - subset of Celltypist data 
test= ad.read('../../Data/Celltypist_test.h5ad')

In [None]:
predictions_ct = ct.annotate(test, model = 'New Models/CT_45 Models/ct_model_0.pkl', majority_voting = True)
predictions_ct.predicted_labels

In [None]:
pred_adatact = predictions_ct.to_adata()
pred_adatact.write_h5ad('../../pred_modelct.h5ad')

Model 1

In [None]:
predictions_1 = ct.annotate(test, model = 'New Models/CT_45 Models/ct_model_1.pkl', majority_voting = True)
predictions_1.predicted_labels

In [None]:
pred_adata1 = predictions_1.to_adata()
pred_adata1.write_h5ad('../../pred_model1.h5ad')

Model 2

In [None]:
predictions_2 = ct.annotate(test, model = 'New Models/CT_45 Models/ct_model_2.pkl', majority_voting = True)
predictions_2.predicted_labels

In [None]:
pred_adata2 = predictions_2.to_adata()

In [None]:
#pred_adata2.write_h5ad('../../pred_model2.h5ad')
pred_adata2= ad.read('../../pred_model2.h5ad')

Model 3

In [None]:
predictions_3 = ct.annotate(test, model = 'New Models/CT_45 Models/ct_model_3.pkl', majority_voting = True)
predictions_3.predicted_labels

In [None]:
pred_adata3 = predictions_3.to_adata()

In [None]:
#pred_adata3.write_h5ad('../../pred_model3.h5ad')
pred_adata3= ad.read('../../pred_model3.h5ad')

Model 4

In [None]:
predictions_4 = ct.annotate(test, model = 'New Models/CT_45 Models/ct_model_4.pkl', majority_voting = True)
predictions_4.predicted_labels

In [None]:
pred_adata4 = predictions_4.to_adata()

In [None]:
#pred_adata4.write_h5ad('../../pred_model4.h5ad')
pred_adata4= ad.read('../../pred_model4.h5ad')

### Using CT_98 Models

In [None]:
#Import test data - subset of CT_98 data 
test_98= ad.read('../../Data/CT_98_Test.h5ad')

Model 1

In [None]:
predictions_98_0 = ct.annotate(test_98, model = 'New Models/CT_98 Models/98_model_0.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adata98_0 = predictions_98_0.to_adata()
pred_adata98_0.write_h5ad('../../predictions/pred_98_model0.h5ad')

Model 2

In [None]:
predictions_98_2 = ct.annotate(test_98, model = 'New Models/CT_98 Models/98_model_2.pkl', majority_voting = True)
#predictions_4.pif x in genes:redicted_labels
pred_adata98_2 = predictions_98_2.to_adata()
pred_adata98_2.write_h5ad('../../predictions/pred_98_model2.h5ad')

Model 3

In [None]:
predictions_98_3 = ct.annotate(test_98, model = 'New Models/CT_98 Models/98_model_3.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adata98_3 = predictions_98_3.to_adata()
pred_adata98_3.write_h5ad('../../predictions/pred_98_model3.h5ad')

Model 4

In [None]:
predictions_98_4 = ct.annotate(test_98, model = 'New Models/CT_98 Models/98_model_4.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adata98_4 = predictions_98_4.to_adata()
pred_adata98_4.write_h5ad('../../predictions/pred_98_model4.h5ad')

### Using COV_PBMC Models

In [None]:
#Import test data - subset of CT_98 data 
test_COV= ad.read('../../Data/test_COV.h5ad')
#test_COV_cp = ad.read('../../Data/test_COV_cp.h5ad')

Model 0

In [None]:
predictions_COV_0 = ct.annotate(test_COV, model = 'New Models/COV_PBMC Models/COV_model_0.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adataCOV_0 = predictions_COV_0.to_adata()
pred_adataCOV_0.write_h5ad('../../predictions/pred_COV_model0.h5ad')

Model 2

In [None]:
predictions_COV_2 = ct.annotate(test_COV, model = 'New Models/COV_PBMC Models/COV_model_2.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adataCOV_2 = predictions_COV_2.to_adata()
pred_adataCOV_2.write_h5ad('../../predictions/pred_COV_model2.h5ad')

Model 3

In [None]:
predictions_COV_3 = ct.annotate(test_COV, model = 'New Models/COV_PBMC Models/COV_model_3.pkl', majority_voting = True)
#predictions_COV_4.predicted_labels
pred_adataCOV_3 = predictions_COV_3.to_adata()
pred_adataCOV_3.write_h5ad('../../predictions/pred_COV_model3.h5ad')

In [None]:
#making sure the f1 score is the same if we use dataset with all genes vs just cytopus genes 
#predictions_COV_cp_3 = ct.annotate(test_COV_cp, model = 'New Models/COV_PBMC Models/COV_model_3.pkl', majority_voting = True)
#predictions_COV_3.predicted_labels
#pred_adataCOV_cp_3 = predictions_COV_cp_3.to_adata()
#pred_adataCOV_cp_3.write_h5ad('../../predictions/pred_COV_cp_model3.h5ad')

Model 4

In [None]:
predictions_COV_4 = ct.annotate(test_COV, model = 'New Models/COV_PBMC Models/COV_model_4.pkl', majority_voting = True)
#predictions_4.predicted_labels
pred_adataCOV_4 = predictions_COV_4.to_adata()
pred_adataCOV_4.write_h5ad('../../predictions/pred_COV_model4.h5ad')

### Using Glasner Models

In [3]:
#Import test data 
test_g= ad.read('/data/peer/adamsj5/cell_typing/test_glas.h5ad') #lilac location

Model 0

In [22]:
predictions_g_0 = annotate(test_g, model = 'New Models/Glasner Models/g_model_0.pkl', majority_voting = True)
pred_adatag_0 = predictions_g_0.to_adata()
pred_adatag_0.write_h5ad('/data/peer/adamsj5/cell_typing/pred_g_model0.h5ad')

👀 Invalid expression matrix in `.X`, expect log1p normalized expression to 10000 counts per cell; will try the `.raw` attribute
🛑 Invalid expression matrix in both `.X` and `.raw.X`, expect log1p normalized expression to 10000 counts per cell
🔬 Input data has 24898 cells and 25944 genes
🔗 Matching reference genes in the model


ah


🧬 3661 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
⛓️ Over-clustering input data with resolution set to 15
🗳️ Majority voting the predictions
✅ Majority voting done!


In [42]:
predictions_g_0.predicted_labels["predicted_labels"].equals(predictions_g_2.predicted_labels["predicted_labels"])

False

In [None]:
np.log1p(10000)

Model 2

In [5]:
predictions_g_2 = annotate(test_g, model = 'New Models/Glasner Models/g_model_2.pkl', majority_voting = True)
pred_adatag_2 = predictions_g_2.to_adata()
pred_adatag_2.write_h5ad('/data/peer/adamsj5/cell_typing/pred_g_model2.h5ad')

👀 Invalid expression matrix in `.X`, expect log1p normalized expression to 10000 counts per cell; will try the `.raw` attribute
🛑 Invalid expression matrix in both `.X` and `.raw.X`, expect log1p normalized expression to 10000 counts per cell
🔬 Input data has 24898 cells and 25944 genes
🔗 Matching reference genes in the model


ah


🧬 4128 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
⛓️ Over-clustering input data with resolution set to 15
🗳️ Majority voting the predictions
✅ Majority voting done!


Model 3

In [6]:
predictions_g_3 = annotate(test_g, model = 'New Models/Glasner Models/g_model_3.pkl', majority_voting = True)
pred_adatag_3 = predictions_g_3.to_adata()
pred_adatag_3.write_h5ad('/data/peer/adamsj5/cell_typing/pred_COV_model3.h5ad')

👀 Invalid expression matrix in `.X`, expect log1p normalized expression to 10000 counts per cell; will try the `.raw` attribute
🛑 Invalid expression matrix in both `.X` and `.raw.X`, expect log1p normalized expression to 10000 counts per cell
🔬 Input data has 24898 cells and 25944 genes
🔗 Matching reference genes in the model


ah


🧬 304 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
⛓️ Over-clustering input data with resolution set to 15
🗳️ Majority voting the predictions
✅ Majority voting done!


Model 4

In [8]:
predictions_g_4 = annotate(test_g, model = 'New Models/Glasner Models/g_model_4.pkl', majority_voting = True)
pred_adatag_4 = predictions_g_4.to_adata()
pred_adatag_4.write_h5ad('/data/peer/adamsj5/cell_typing/pred_COV_model4.h5ad')

👀 Invalid expression matrix in `.X`, expect log1p normalized expression to 10000 counts per cell; will try the `.raw` attribute
🛑 Invalid expression matrix in both `.X` and `.raw.X`, expect log1p normalized expression to 10000 counts per cell
🔬 Input data has 24898 cells and 25944 genes
🔗 Matching reference genes in the model


ah


🧬 3780 features used for prediction
⚖️ Scaling input data
🖋️ Predicting labels
✅ Prediction done!
👀 Detected a neighborhood graph in the input object, will run over-clustering on the basis of it
⛓️ Over-clustering input data with resolution set to 15
🗳️ Majority voting the predictions
✅ Majority voting done!


## Benchmarking
### F1 scores
Abdelaal et al. used median F1 scores as their primary statistic

#### Train & Test on CT_45

In [None]:
pred_adatact.obs["predicted_labels"]

In [None]:
#og celltypist  - 0.892
np.median(f1_score(pred_adatact.obs["Manually_curated_celltype"], pred_adatact.obs["predicted_labels"], average=None))

In [None]:
#model 1 - cant get it to run, gets stuck on Scaling for too long 
#np.median(f1_score(pred_adata1.obs["Manually_curated_celltype"], pred_adata1.obs["predicted_labels"], average=None))

In [None]:
#model 2  - 0.74
np.median(f1_score(pred_adata2.obs["Manually_curated_celltype"], pred_adata2.obs["predicted_labels"], average=None))

In [None]:
#model 3  - 0.79
np.median(f1_score(pred_adata3.obs["Manually_curated_celltype"], pred_adata3.obs["predicted_labels"], average=None))

In [None]:
#model 4 - 0.887
np.median(f1_score(pred_adata4.obs["Manually_curated_celltype"], pred_adata4.obs["predicted_labels"], average = None))

#### Train & Test on CT_98

In [None]:
#og celltypist  - 0.844
np.median(f1_score(pred_adata98_0.obs["Harmonised_detailed_type"], pred_adata98_0.obs["predicted_labels"], average=None))

In [None]:
#model 2  - 0.508
np.median(f1_score(pred_adata98_2.obs["Harmonised_detailed_type"], pred_adata98_2.obs["predicted_labels"], average=None))

In [None]:
#model 3  - 0.810?
np.median(f1_score(pred_adata98_3.obs["Harmonised_detailed_type"], pred_adata98_3.obs["predicted_labels"], average=None))

In [None]:
#model 4 - 0.810?
np.median(f1_score(pred_adata98_4.obs["Harmonised_detailed_type"], pred_adata98_4.obs["predicted_labels"], average = None))

#### Train & Test on COV_45

In [None]:
#og celltypist model - 0.609
np.median(f1_score(pred_adataCOV_0.obs["full_clustering"], pred_adataCOV_0.obs["predicted_labels"], average=None))

In [None]:
#model 2  - 
np.median(f1_score(pred_adataCOV_2.obs["full_clustering"], pred_adataCOV_2.obs["predicted_labels"], average=None))

In [None]:
#model 3 - 0.507
np.median(f1_score(pred_adataCOV_3.obs["full_clustering"], pred_adataCOV_3.obs["predicted_labels"], average=None))

In [None]:
#model 3 cytopus genes dataset to make sure they are the same - 0.507
np.median(f1_score(pred_adataCOV_cp_3.obs["full_clustering"], pred_adataCOV_cp_3.obs["predicted_labels"], average=None))

In [None]:
#model 4 - 
np.median(f1_score(pred_adataCOV_4.obs["full_clustering"], pred_adataCOV_4.obs["predicted_labels"], average = None))

#### Train & Test on Glasner

In [44]:
#og celltypist model - 
np.median(f1_score(pred_adatag_0.obs["finer_cell_types"], predictions_g_0.predicted_labels["predicted_labels"], average=None))

0.2539842792240571

In [18]:
pred_adatag_3.obs

Unnamed: 0,histology,Procedure_Type,n_genes_by_counts,total_counts,total_counts_mt,pct_counts_mt,total_counts_ribo,pct_counts_ribo,Phenograph_cluster,sample_number,...,development_stage,suspension_type,assay,cell_lineage,finer_cell_types,orig_cell_types,predicted_labels,majority_voting,conf_score,over_clustering
236175618587446,LUAD,,2965,12123.0,203.0,1.674503,1932.0,15.936649,C19,25,...,unknown,cell,10xV2,T/NK,T/NK,T/NK,Myeloid,Myeloid,1.000000e+00,89
126834221116188,LUAD,Resection,1380,6730.0,226.0,3.358098,3517.0,52.258545,C6,8,...,unknown,cell,10xV2,B cell,B cell,B cell,B cell,B cell,1.000000e+00,59
133898354183475,LUAD,Resection,5268,34037.0,4274.0,12.556924,3030.0,8.902078,C24,14,...,unknown,cell,10xV3,Epithelial,Epithelial,Epithelial,Epithelial,Epithelial,1.000000e+00,80
200563776941364,LUAD,,1034,2579.0,23.0,0.891819,716.0,27.762699,C1,20,...,unknown,cell,10xV2,T/NK,T/NK,T/NK,T/NK,T/NK,1.000000e+00,228
155842595900140,LUAD,Resection,1485,6816.0,652.0,9.565727,3162.0,46.390846,C6,15,...,unknown,cell,10xV3,B cell,B cell,B cell,B cell,B cell,1.000000e+00,57
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125677948062430,LUAD,Resection,1820,7921.0,190.0,2.398687,3035.0,38.315868,C5,3,...,unknown,cell,10xV2,T/NK,T/NK,T/NK,T/NK,T/NK,1.000000e+00,49
169151741418203,LUAD,Resection,1281,3813.0,90.0,2.360346,1194.0,31.313927,C3,5,...,unknown,cell,10xV2,T/NK,T/NK,T/NK,T/NK,T/NK,1.000000e+00,98
192721152298270,LUAD,Biopsy,1363,4044.0,286.0,7.072206,1078.0,26.656776,C15,18,...,unknown,cell,10xV3,T/NK,T/NK,T/NK,T/NK,T/NK,1.000000e+00,2
130675161807715,LUAD,Resection,4363,28569.0,938.0,3.283279,7121.0,24.925619,C11,11,...,unknown,cell,10xV2,Fibroblast,alveolar_lung_fibroblast,Fibroblast,alveolar_lung_fibroblast,alveolar_lung_fibroblast,1.000000e+00,67


In [45]:
#model 2  - 
np.median(f1_score(pred_adatag_2.obs["finer_cell_types"],predictions_g_2.predicted_labels["predicted_labels"], average=None))

0.39576944022471267

In [46]:
#model 3 - 
np.median(f1_score(pred_adatag_3.obs["finer_cell_types"],predictions_g_3.predicted_labels["predicted_labels"], average=None))

0.5966898954703833

In [47]:
#model 4 - 
np.median(f1_score(pred_adatag_4.obs["finer_cell_types"], predictions_g_4.predicted_labels["predicted_labels"], average = None))

0.21479497529141045

In [49]:
pred_adatag_2.obs["finer_cell_types"].equals(test_g.obs["finer_cell_types"])

True

In [None]:
!pip show celltypist