# Getting started with Starling (ST)


In [21]:
# %pip install biostarling
# %pip install lightning_lite

import anndata as ad
import pandas as pd
import torch
from starling import starling, utility
from lightning_lite import seed_everything
import pytorch_lightning as pl
import numpy as np
from torch.utils.data import DataLoader


## Setting seed for everything


In [None]:
seed_everything(10, workers=True)

INFO:lightning_lite.utilities.seed:Global seed set to 10


10

## Loading annData objects


In [42]:
anndata_train = ad.read_h5ad("train_adata.h5ad")
anndata_train.X = np.arcsinh(anndata_train.layers['exprs'] / 5.0)

anndata_train.obs["cell_cat_labels"] = anndata_train.obs["cell_labels"].astype('category').cat.codes.values
labels_map = anndata_train.obs.set_index('cell_cat_labels')['cell_labels'].to_dict()

labels = anndata_train.obs["cell_cat_labels"]

adata = utility.init_clustering("User", anndata_train, labels=labels)


- The input anndata object should contain a cell-by-protein matrix of segmented single-cell expression profiles in the `.X` position. Optionally, cell size information can also be provided as a column of the `.obs` DataFrame. In this case `model_cell_size` should be set to `True` and the column specified in the `cell_size_col_name`argument.
- Users might want to arcsinh protein expressions in \*.h5ad (for example, `sample_input.h5ad`).
- The `utility.py` provides an easy setup of GMM, KM (Kmeans) or PG (PhenoGraph).
- Default settings are applied to each method.
- k can be omitted when PG is used.


## Setting initializations


The example below uses defualt parameter settings based on benchmarking results (more details in manuscript).


In [43]:
st = starling.ST(adata)

  torch.tensor(self.adata.obs[self.cell_size_col_name])


A list of parameters are shown:

- adata: annDATA object of the sample
- dist_option (default: 'T'): T for Student-T (df=2) and N for Normal (Gaussian)
- singlet_prop (default: 0.6): the proportion of anticipated segmentation error free cells
- model_cell_size (default: 'Y'): Y for incoporating cell size in the model and N otherwise
- cell_size_col_name (default: 'area'): area is the column name in anndata.obs dataframe
- model_zplane_overlap (default: 'Y'): Y for modeling z-plane overlap when cell size is modelled and N otherwise
  Note: if the user sets model_cell_size = 'N', then model_zplane_overlap is ignored
- model_regularizer (default: 1): Regularizier term impose on synthetic doublet loss (BCE)
- learning_rate (default: 1e-3): The learning rate of ADAM optimizer for STARLING

Equivalent to the above example:
```python
st = starling.ST(adata, 'T', 'Y', 'area', 'Y', 1, 1e-3)
```


## Setting training log


Once training starts, a new directory 'log' will be created.

In [44]:
## log training results via tensorboard
log_tb = pl.loggers.TensorBoardLogger(save_dir="log")

One could view the training information via tensorboard. Please refer to torch lightning (https://lightning.ai/docs/pytorch/stable/api_references.html#profiler) for other possible loggers.


## Setting early stopping criterion


In [45]:
## set early stopping criterion
cb_early_stopping = pl.callbacks.EarlyStopping(monitor="train_loss", mode="min", verbose=False)

Training loss is monitored.


## Training Starling


In [46]:
## train ST
st.train_and_fit(
    callbacks=[cb_early_stopping],
    logger=[log_tb],
)

INFO:lightning_lite.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:lightning_lite.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:lightning_lite.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

## Appending STARLING results to the annData object


In [47]:
## retrive starling results
result = st.result()

## The following information can be retrived from the annData object:

- st.adata.varm['init_exp_centroids'] -- initial expression cluster centroids (P x C matrix)
- st.adata.varm['st_exp_centroids'] -- ST expression cluster centroids (P x C matrix)
- st.adata.uns['init_cell_size_centroids'] -- initial cell size centroids if STARLING models cell size
- st.adata.uns['st_cell_size_centroids'] -- initial & ST cell size centroids if ST models cell size
- st.adata.obsm['assignment_prob_matrix'] -- cell assignment probability (N x C maxtrix)
- st.adata.obsm['gamma_prob_matrix'] -- gamma probabilitiy of two cells (N x C x C maxtrix)
- st.adata.obs['doublet'] -- doublet indicator
- st.adata.obs['doublet_prob'] -- doublet probabilities
- st.adata.obs['init_label'] -- initial assignments
- st.adata.obs['st_label'] -- ST assignments
- st.adata.obs['max_assign_prob'] -- ST max probabilites of assignments

_N: # of cells; C: # of clusters; P: # of proteins_


## Saving the model


In [48]:
## st object can be saved
torch.save(st, "model.pt")

model.pt will be saved in the same directory as this notebook.


## Showing STARLING results


In [49]:
display(result)

AnnData object with n_obs × n_vars = 253433 × 40
    obs: 'image', 'sample_id', 'ObjectNumber', 'Pos_X', 'Pos_Y', 'area', 'major_axis_length', 'minor_axis_length', 'eccentricity', 'width_px', 'height_px', 'acquisition_id', 'SlideId', 'Study', 'Box.Description', 'Position', 'SampleId', 'Indication', 'BatchId', 'SubBatchId', 'ROI', 'ROIonSlide', 'includeImage', 'flag_no_cells', 'flag_no_ROI', 'flag_total_area', 'flag_percent_covered', 'small_cell', 'celltypes', 'flag_tumor', 'PD1_pos', 'Ki67_pos', 'cleavedPARP_pos', 'GrzB_pos', 'tumor_patches', 'distToCells', 'CD20_patches', 'Batch', 'cell_labels', 'classifier', 'cell_cat_labels', 'init_label', 'st_label', 'doublet_prob', 'doublet', 'max_assign_prob'
    var: 'channel', 'use_channel', 'marker'
    uns: 'init_cell_size_centroids', 'init_cell_size_variances', 'st_cell_size_centroids'
    obsm: 'assignment_prob_matrix', 'gamma_assignment_prob_matrix'
    varm: 'init_exp_centroids', 'init_exp_variances', 'st_exp_centroids'
    layers: 'exprs

One could easily perform further analysis such as co-occurance, enrichment analysis and etc.


In [50]:
result.obs

Unnamed: 0,image,sample_id,ObjectNumber,Pos_X,Pos_Y,area,major_axis_length,minor_axis_length,eccentricity,width_px,...,CD20_patches,Batch,cell_labels,classifier,cell_cat_labels,init_label,st_label,doublet_prob,doublet,max_assign_prob
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_1,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,1,300.846154,0.692308,13,6.094800,2.780135,0.889904,600,...,,Batch20191023,MacCD163,v1,6,6,6,0.024576,0,0.975423
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_3,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,3,26.982143,0.928571,56,21.520654,3.368407,0.987675,600,...,,Batch20191023,Mural,v1,7,7,9,0.262323,0,0.737665
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_5,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,5,309.083333,0.750000,12,5.294329,2.862220,0.841267,600,...,,Batch20191023,DC,v1,4,4,3,0.547556,1,0.452317
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_7,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,7,431.916667,0.750000,12,5.294329,2.862220,0.841267,600,...,,Batch20191023,Tumor,v1,11,11,3,0.997515,1,0.002484
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_8,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01...,8,116.931034,1.206897,29,9.216670,4.112503,0.894932,600,...,,Batch20191023,Tumor,v1,11,11,9,0.982066,1,0.017789
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2713,IMMUcan_Batch20220908_S-220729-00002_002.tiff,IMMUcan_Batch20220908_S-220729-00002_002,2713,596.548387,596.709677,31,6.857501,5.700162,0.555928,600,...,,Batch20220908,Mural,v3,7,7,9,0.075758,0,0.923669
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2715,IMMUcan_Batch20220908_S-220729-00002_002.tiff,IMMUcan_Batch20220908_S-220729-00002_002,2715,180.300000,597.400000,20,6.484816,3.840203,0.805803,600,...,,Batch20220908,Mural,v3,7,7,7,0.014739,0,0.985261
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2721,IMMUcan_Batch20220908_S-220729-00002_002.tiff,IMMUcan_Batch20220908_S-220729-00002_002,2721,48.370370,598.111111,27,10.732613,3.134663,0.956397,600,...,,Batch20220908,CD8,v3,3,3,2,0.955928,1,0.037961
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2722,IMMUcan_Batch20220908_S-220729-00002_002.tiff,IMMUcan_Batch20220908_S-220729-00002_002,2722,207.969697,598.060606,33,12.864691,3.228974,0.967988,600,...,,Batch20220908,Mural,v3,7,7,7,0.012922,0,0.987078


Starling provides doublet probabilities and cell assignment if it were a singlet for each cell.


## Showing initial expression centroids:


In [51]:
## initial expression centroids (p x c) matrix
pd.DataFrame(result.varm["init_exp_centroids"], index=result.var_names)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.04233,0.05544,0.045429,0.046147,0.058866,0.048206,0.053753,0.039186,0.043877,0.340778,0.047716,0.042953,0.058844,0.048891
1,0.510187,0.557392,0.512964,0.525483,0.528893,0.484494,0.471054,0.457013,0.517266,0.5573,0.5702,0.524787,0.565808,0.547751
2,0.086614,0.086694,0.125974,0.105891,0.138406,0.13496,0.111773,0.24604,0.115593,0.098263,0.145649,0.051658,0.107666,0.11967
3,0.153765,0.183137,0.234594,0.269289,0.306868,0.194293,0.369733,0.099399,0.308056,0.150043,0.283433,0.119749,0.260154,0.21102
4,0.11589,0.193104,0.127355,0.141434,0.108857,0.073516,0.082383,0.058896,0.148396,0.093526,0.145153,0.086915,0.215936,0.417757
5,0.623071,0.665065,0.532681,0.561389,0.720928,0.63585,0.678736,0.202119,0.50227,0.343132,0.580249,0.306554,0.651017,0.393635
6,0.228295,0.390863,0.23632,0.264384,0.177816,0.119897,0.139272,0.106115,0.191471,0.164392,0.339419,0.13239,0.294915,0.351005
7,0.182313,0.203169,0.193568,0.208944,0.233916,0.184893,0.201099,0.129752,0.187617,0.86579,0.205089,0.278693,0.202749,0.166756
8,0.465929,0.489237,0.154292,0.168922,0.153395,0.101692,0.105535,0.078221,0.30917,0.133894,0.150798,0.129597,0.37748,0.293534
9,0.175452,0.216623,0.21317,0.217946,0.149752,0.119206,0.450712,0.074606,0.221459,0.140539,0.232865,0.066675,0.260143,0.185867


There are 10 centroids since we set Kmeans (KM) as k = 10 earlier.


## Showing Starling expression centroids:


In [52]:
## starling expression centroids (p x c) matrix
pd.DataFrame(result.varm["st_exp_centroids"], index=result.var_names)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.033313,0.062858,0.030869,0.042249,0.034201,0.033854,0.038436,0.028671,0.031393,0.063674,0.049872,0.025501,0.079325,0.03548
1,0.473998,0.695459,0.489029,0.533541,0.441941,0.422818,0.430709,0.404287,0.389905,0.580307,0.606788,0.410119,0.79363,0.641596
2,0.045429,0.095318,0.051425,0.085316,0.011567,0.002372,0.050554,0.171382,0.026072,0.12243,0.032405,0.020544,0.022494,0.05716
3,0.092179,0.465458,0.13261,0.421557,0.07,0.029684,0.3066,0.0312,0.050573,0.080478,0.130023,0.04479,0.301653,0.063504
4,0.102963,0.309066,0.046091,0.113315,0.059454,0.028158,0.044092,0.025218,0.024438,0.091397,0.127092,0.046725,0.255513,0.077255
5,0.625823,0.737519,0.491742,0.700881,0.324188,0.110551,0.633401,0.12732,0.287963,0.268456,0.343097,0.085095,0.392052,0.156132
6,0.334502,0.426569,0.203024,0.196835,0.103193,0.052839,0.102079,0.064547,0.06139,0.176711,0.19433,0.070616,0.312744,0.136463
7,0.098183,0.156252,0.077397,0.174051,0.354846,0.508907,0.088655,0.032982,0.316111,0.391211,0.179309,0.160617,0.158292,0.045259
8,0.391924,0.29414,0.07194,0.136815,0.096121,0.046072,0.063869,0.045791,0.044891,0.129727,0.185355,0.08415,0.29056,0.139972
9,0.071937,0.323524,0.083224,0.40518,0.03609,0.016168,0.327825,0.023537,0.02404,0.064337,0.059889,0.023854,0.092375,0.035058


From here one could easily annotate cluster centroids to cell type.


## Showing Assignment Distributions:


In [53]:
## assignment distributions (n x c maxtrix)
pd.DataFrame(result.obsm["assignment_prob_matrix"], index=result.obs.index)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_1,4.819697e-15,3.036467e-27,9.863106e-07,2.114836e-09,3.622485e-15,5.382546e-25,9.754232e-01,5.847439e-13,3.402193e-15,1.427050e-08,2.284337e-20,1.026573e-17,4.754550e-34,1.144729e-20
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_3,1.544102e-11,2.363828e-23,1.140087e-05,4.381006e-12,6.274536e-07,1.452786e-21,6.414261e-09,1.060144e-08,1.938790e-12,7.376651e-01,1.513696e-07,3.644414e-10,2.424354e-23,2.563570e-08
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_5,4.182603e-11,2.774790e-10,9.355787e-09,4.523175e-01,7.572065e-19,2.442444e-35,1.267383e-04,1.437322e-24,3.379553e-26,4.221562e-12,8.003574e-13,7.222308e-27,2.796475e-18,9.474572e-24
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_7,1.715274e-07,2.747980e-12,7.187680e-07,2.484256e-03,8.177698e-12,1.034417e-36,3.187305e-09,9.143121e-27,3.939431e-27,1.363441e-08,5.150077e-08,8.068487e-24,1.648570e-13,4.468970e-18
IMMUcan_batch20191023_10032145-THOR-VAR-TIS-01-IMC-01_002.tiff_8,5.715810e-11,1.158090e-17,1.166230e-04,1.223937e-06,1.012615e-05,2.363293e-21,1.183904e-05,5.911510e-13,5.513628e-14,1.778923e-02,4.996485e-06,6.883174e-11,8.367365e-20,1.756223e-09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2713,4.392831e-11,5.392072e-21,4.138788e-04,1.755312e-09,5.770615e-13,4.005845e-21,1.591941e-04,2.228397e-09,1.167276e-11,9.236690e-01,1.102255e-15,3.174787e-14,3.698339e-22,5.206761e-18
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2715,3.637906e-27,5.877919e-45,5.040891e-16,1.059234e-30,2.103454e-25,1.153997e-14,1.911103e-16,9.852608e-01,8.202788e-13,1.280022e-15,6.375861e-33,1.418033e-16,2.220770e-49,1.514680e-25
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2721,8.411613e-07,1.236565e-14,3.796112e-02,6.039330e-03,7.991745e-19,1.665622e-35,7.009746e-05,7.537864e-22,2.284319e-25,5.967188e-07,1.554045e-17,5.306543e-24,7.783148e-25,9.488874e-23
IMMUcan_Batch20220908_S-220729-00002_002.tiff_2722,1.116901e-24,1.691231e-40,1.039714e-12,1.472362e-26,1.477596e-20,2.270337e-12,7.350720e-16,9.870783e-01,4.022036e-10,2.802520e-12,1.714626e-28,9.606640e-14,6.789703e-44,2.961781e-20


Currently, we assign a cell label based on the maximum probability among all possible clusters. However, these could be mislabeled because maximum and second highest probabilies can be very close.

## Assign labels to clusters


## Testing

In [54]:
anndata_test = ad.read_h5ad("test_adata.h5ad")
anndata_test.X = np.arcsinh(anndata_test.layers['exprs'] / 5.0)


In [55]:
def test(trained_model, test_adata, threshold: float = 0.5):
        """Test the trained model on the test data.

        :param threshold: minimum threshold for singlet probability
        """
        S = test_adata.obs['area']
        model_pred_loader = DataLoader(
            utility.ConcatDataset([test_adata.X, S]), batch_size=1000, shuffle=False
        )

        singlet_prob, singlet_assig_prob, gamma_assig_prob = utility.predict(
            model_pred_loader,
            trained_model.model_params,
            trained_model.dist_option,
            trained_model.model_cell_size,
            trained_model.model_zplane_overlap,
            threshold,
        )

        test_adata.obs["st_label"] = np.array(
            singlet_assig_prob.max(1).indices
        )  ##p(z=c|d=1)
        test_adata.obs["doublet_prob"] = 1 - np.array(singlet_prob)
        test_adata.obs["doublet"] = 0
        test_adata.obs.loc[test_adata.obs["doublet_prob"] > 0.5, "doublet"] = 1
        test_adata.obs["max_assign_prob"] = np.array(singlet_assig_prob.max(1).values)

        test_adata.obsm["assignment_prob_matrix"] = np.array(singlet_assig_prob)
        test_adata.obsm["gamma_assignment_prob_matrix"] = np.array(gamma_assig_prob)
        c = trained_model.model_params["log_mu"].detach().exp().cpu().numpy()

        test_adata.varm[
            "st_exp_centroids"
        ] = c.T  # pd.DataFrame(c, columns=test_adata.var_names)

        if test_st.model_cell_size:
            test_adata.uns["st_cell_size_centroids"] = (
                trained_model.model_params["log_psi"]
                .reshape(-1, 1)
                .detach()
                .exp()
                .cpu()
                .numpy()
                .T
            )


        return test_adata

In [56]:
test_result = test(st, anndata_test)

  return tuple(d[i] for d in self.datasets)


In [60]:
test_df = pd.DataFrame(test_result.obsm["assignment_prob_matrix"], index=test_result.obs.index)

In [61]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
IMMUcan_Batch20191023_S-190805-00002_006.tiff_1,7.180056e-15,6.544796e-22,2.035737e-05,9.803418e-10,2.025146e-13,3.319983e-21,9.521235e-01,2.962869e-06,3.547025e-09,1.346867e-04,1.507474e-16,1.759956e-16,3.379480e-30,2.011551e-13
IMMUcan_Batch20191023_S-190805-00002_006.tiff_2,1.191193e-24,1.312913e-37,4.580353e-15,2.440680e-25,1.297680e-13,3.099601e-10,2.834933e-15,6.069158e-05,9.570885e-01,5.272801e-12,3.180828e-24,1.609645e-09,2.220061e-40,2.920722e-20
IMMUcan_Batch20191023_S-190805-00002_006.tiff_3,2.645107e-14,2.079836e-28,2.797927e-05,1.866736e-14,6.773539e-07,6.706542e-16,1.385744e-06,1.736687e-04,1.689401e-03,3.251793e-03,3.841040e-14,5.219581e-08,2.708305e-28,4.426624e-10
IMMUcan_Batch20191023_S-190805-00002_006.tiff_4,3.943732e-17,5.155740e-30,1.501028e-07,7.565844e-18,1.732028e-08,7.696077e-12,4.121034e-06,1.086233e-01,1.633628e-01,1.402582e-04,3.613567e-17,3.430283e-06,7.302974e-33,2.016674e-12
IMMUcan_Batch20191023_S-190805-00002_006.tiff_5,6.978014e-18,2.260007e-33,5.119142e-09,7.696814e-20,2.314663e-09,3.480688e-11,7.300070e-10,1.105116e-02,1.025875e-01,6.362978e-05,8.770455e-18,1.449613e-05,5.654488e-36,1.556924e-13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2396,4.922848e-24,1.035299e-39,1.637690e-13,1.882400e-24,2.661686e-10,2.794593e-01,1.310213e-11,6.157591e-01,3.130679e-03,9.317615e-10,4.862552e-25,2.442343e-03,7.032383e-42,1.162219e-18
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2397,6.623137e-20,1.454933e-32,1.661276e-11,2.475183e-20,1.732221e-08,3.704033e-06,1.301370e-13,3.069907e-04,2.097375e-08,1.467161e-04,1.425512e-17,3.456271e-01,1.123489e-33,1.544545e-08
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2398,2.647938e-14,2.113130e-27,2.796058e-08,1.132004e-12,3.128148e-02,1.451700e-14,1.086177e-06,3.705450e-08,8.963383e-11,2.438202e-01,1.741412e-10,2.238421e-02,3.018139e-26,1.846833e-06
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2399,3.071770e-11,9.034238e-18,1.472902e-08,6.644621e-04,1.386480e-09,1.186599e-29,4.106137e-05,3.205050e-22,1.436171e-21,1.929346e-03,1.736752e-09,3.763962e-15,6.739318e-23,4.474809e-08


In [63]:
test_df['predicted_label'] = np.argmax(test_result.obsm["assignment_prob_matrix"], axis=1)
test_df['predicted_label'] = test_df['predicted_label'].map(labels_map)
test_df['true_label'] = test_result.obs['cell_labels']
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,predicted_label,true_label
IMMUcan_Batch20191023_S-190805-00002_006.tiff_1,7.180056e-15,6.544796e-22,2.035737e-05,9.803418e-10,2.025146e-13,3.319983e-21,9.521235e-01,2.962869e-06,3.547025e-09,1.346867e-04,1.507474e-16,1.759956e-16,3.379480e-30,2.011551e-13,MacCD163,DC
IMMUcan_Batch20191023_S-190805-00002_006.tiff_2,1.191193e-24,1.312913e-37,4.580353e-15,2.440680e-25,1.297680e-13,3.099601e-10,2.834933e-15,6.069158e-05,9.570885e-01,5.272801e-12,3.180828e-24,1.609645e-09,2.220061e-40,2.920722e-20,NK,Tumor
IMMUcan_Batch20191023_S-190805-00002_006.tiff_3,2.645107e-14,2.079836e-28,2.797927e-05,1.866736e-14,6.773539e-07,6.706542e-16,1.385744e-06,1.736687e-04,1.689401e-03,3.251793e-03,3.841040e-14,5.219581e-08,2.708305e-28,4.426624e-10,Neutrophil,Tumor
IMMUcan_Batch20191023_S-190805-00002_006.tiff_4,3.943732e-17,5.155740e-30,1.501028e-07,7.565844e-18,1.732028e-08,7.696077e-12,4.121034e-06,1.086233e-01,1.633628e-01,1.402582e-04,3.613567e-17,3.430283e-06,7.302974e-33,2.016674e-12,NK,Tumor
IMMUcan_Batch20191023_S-190805-00002_006.tiff_5,6.978014e-18,2.260007e-33,5.119142e-09,7.696814e-20,2.314663e-09,3.480688e-11,7.300070e-10,1.105116e-02,1.025875e-01,6.362978e-05,8.770455e-18,1.449613e-05,5.654488e-36,1.556924e-13,NK,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2396,4.922848e-24,1.035299e-39,1.637690e-13,1.882400e-24,2.661686e-10,2.794593e-01,1.310213e-11,6.157591e-01,3.130679e-03,9.317615e-10,4.862552e-25,2.442343e-03,7.032383e-42,1.162219e-18,Mural,Tumor
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2397,6.623137e-20,1.454933e-32,1.661276e-11,2.475183e-20,1.732221e-08,3.704033e-06,1.301370e-13,3.069907e-04,2.097375e-08,1.467161e-04,1.425512e-17,3.456271e-01,1.123489e-33,1.544545e-08,Tumor,Tumor
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2398,2.647938e-14,2.113130e-27,2.796058e-08,1.132004e-12,3.128148e-02,1.451700e-14,1.086177e-06,3.705450e-08,8.963383e-11,2.438202e-01,1.741412e-10,2.238421e-02,3.018139e-26,1.846833e-06,Neutrophil,Tumor
IMMUcan_Batch20220908_S-220715-00002_002.tiff_2399,3.071770e-11,9.034238e-18,1.472902e-08,6.644621e-04,1.386480e-09,1.186599e-29,4.106137e-05,3.205050e-22,1.436171e-21,1.929346e-03,1.736752e-09,3.763962e-15,6.739318e-23,4.474809e-08,Neutrophil,Tumor


In [64]:
from sklearn.metrics import confusion_matrix, classification_report

print(confusion_matrix(test_df['true_label'], test_df['predicted_label']))
print(classification_report(test_df['true_label'], test_df['predicted_label']))

[[1137   18  289  225  186    4  435  209   60  174   10   16    2    2]
 [2833  133  308   47    1    0    8    2    4    2    1    0    2    0]
 [1320  262 2299 1156   47    5  396  306   89   65  113    5   15   61]
 [1395  967 2864 1157   68    9   72  113   47   18  121   17   15   27]
 [ 422   88   55  515   26    0  730   46   48   60   44    0    2   12]
 [  13    7   50  210   76   18  632  251  136  184   25   17    2    6]
 [   0  100   10 3210   23    0 3030   46    8   33   18    1    3    0]
 [   3    2   64  223   85    6  439 4659  190 2899  146  145   15  101]
 [  25   13  117  121   16    0   48   54   15   58   23    5    0    3]
 [   3   67    7  684   41   53   75   55  105 1376  132   20   13    2]
 [ 503  287  698  323    2    1   20   30    1    5   21    0    9    3]
 [  11  158  225  877 7457 6582 1001  664 5101 1106 6588 5239 1048 5399]
 [ 158   31  100  207    4    0  120   49    2  162    9    0    1    0]
 [ 135   69   65  780   26    3  160  275    6 2142

In [65]:
test_df.to_csv('test_result.tsv', sep='\t')