# pyUCell - some important parameters

This document describes some important parameters of the pyUCell algorithm, and how they can be adapated depending on your dataset.

In [None]:
## Load example dataset

In [1]:
import scanpy as sc
import matplotlib.pyplot as plt
import pyucell as uc

In [3]:
adata = sc.datasets.pbmc3k()

## 1. Positive and negative genes in signatures

Define two simple signatures to test

In [5]:
signatures = {
    "Tcell": ["CD3D", "CD3E", "CD2"],
    "Bcell": ["MS4A1", "CD79A", "CD79B"]
}

Now we can score these gene signatures using pyUCell:

In [9]:
uc.compute_ucell_scores(adata, signatures=signatures, chunk_size=500)

The results are stored in adata.obs as a matrix of scores:

In [10]:
adata.obs

Unnamed: 0_level_0,Tcell_UCell,Bcell_UCell,CD4T_UCell,CD8T_UCell
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AAACATACAACCAC-1,0.599688,0.000000,0.000000,0.770771
AAACATTGAGCTAC-1,0.000000,0.856030,0.000000,0.000000
AAACATTGATCAGC-1,0.902982,0.000000,0.477971,0.000000
AAACCGTGCTTCCG-1,0.191366,0.000000,0.191366,0.000000
AAACCGTGTATGCG-1,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...
TTTCGAACTCTCAT-1,0.000000,0.000000,0.000000,0.000000
TTTCTACTGAGGCA-1,0.000000,0.626391,0.000000,0.000000
TTTCTACTTCCTCG-1,0.000000,0.802403,0.000000,0.000000
TTTGCATGAGAGGC-1,0.000000,0.650645,0.000000,0.000000


## 2. Handling missing genes

In [11]:
signatures = {
    "Tcell": ["CD3D", "CD3E", "CD2"],
    "Tcell2": ["CD3D", "CD3E", "CD2", "notagene"],
}
pyucell.compute_ucell_scores(adata, signatures=signatures, missing_genes="impute")
adata.obs

Unnamed: 0_level_0,Tcell_UCell,Bcell_UCell,CD4T_UCell,CD8T_UCell,Tcell2_UCell
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAACATACAACCAC-1,0.599688,0.000000,0.000000,0.770771,0.449917
AAACATTGAGCTAC-1,0.000000,0.856030,0.000000,0.000000,0.000000
AAACATTGATCAGC-1,0.902982,0.000000,0.477971,0.000000,0.677462
AAACCGTGCTTCCG-1,0.191366,0.000000,0.191366,0.000000,0.143573
AAACCGTGTATGCG-1,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...
TTTCGAACTCTCAT-1,0.000000,0.000000,0.000000,0.000000,0.000000
TTTCTACTGAGGCA-1,0.000000,0.626391,0.000000,0.000000,0.000000
TTTCTACTTCCTCG-1,0.000000,0.802403,0.000000,0.000000,0.000000
TTTGCATGAGAGGC-1,0.000000,0.650645,0.000000,0.000000,0.000000


In [12]:
signatures = {
    "Tcell": ["CD3D", "CD3E", "CD2"],
    "Tcell2": ["CD3D", "CD3E", "CD2", "notagene"],
}
pyucell.compute_ucell_scores(adata, signatures=signatures, missing_genes="skip")
adata.obs

Unnamed: 0_level_0,Tcell_UCell,Bcell_UCell,CD4T_UCell,CD8T_UCell,Tcell2_UCell
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AAACATACAACCAC-1,0.599688,0.000000,0.000000,0.770771,0.599688
AAACATTGAGCTAC-1,0.000000,0.856030,0.000000,0.000000,0.000000
AAACATTGATCAGC-1,0.902982,0.000000,0.477971,0.000000,0.902982
AAACCGTGCTTCCG-1,0.191366,0.000000,0.191366,0.000000,0.191366
AAACCGTGTATGCG-1,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...
TTTCGAACTCTCAT-1,0.000000,0.000000,0.000000,0.000000,0.000000
TTTCTACTGAGGCA-1,0.000000,0.626391,0.000000,0.000000,0.000000
TTTCTACTTCCTCG-1,0.000000,0.802403,0.000000,0.000000,0.000000
TTTGCATGAGAGGC-1,0.000000,0.650645,0.000000,0.000000,0.000000
