# Processing the QC file we created from the raw Single Cell data in to clusters

## Imports

In [None]:
import numpy as np
import scanpy as sc
import os
import pandas as pd
import bbknn

## Single Cell settings

In [None]:
sc.settings.verbosity = 4
sc.settings.set_figure_params(80) 
sc.settings.file_format_figures = 'png'
sc.settings.savefigs = False
use_first_n_samples = 0
full_sparse = False

## Defining a function for processing the data & determing the parameters

In [None]:
def processing_batch(adata, neighbors=15, key='batch', reso=0.4, random_state=0):  # Higher resolution yields more leiden clusters 
    sc.pp.scale(adata, max_value=10)
    sc.tl.pca(adata, svd_solver='arpack')
    sc.logging.print_memory_usage()
    sc.pp.neighbors(adata, n_neighbors=neighbors)
    sc.pl.pca_variance_ratio(adata, log=True)
    bbknn.bbknn(adata, batch_key=key, neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
    sc.tl.umap(adata)
    sc.tl.leiden(adata, resolution=reso)
    sc.pl.umap(adata, color='leiden')

## Reading the excisting h5ad files I created previously in QC pipeline 

In [None]:
#adata after QC
adata = sc.read(r'/Code/data/single_cell/animals/turtle/turtle_3/''turtle_3_after_qc_new.h5ad') 

sc.read_h5ad(r'/Code/data/single_cell/animals/turtle/turtle_3/''turtle_3_after_qc_new.h5ad')

# If the anndata object is processed and contains the raw data in adata.raw run -  adata = adata.raw.to_adata()

In [None]:
adata.var

## Processing the data 

In [None]:
adat

In [None]:
adata.obs

In [None]:
adata.var

### Cleaning, normalizing, reducing dimensionality, clustering cells and creating visualizations

In [None]:
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
sc.pl.highly_variable_genes(adata)
adata.raw = adata
# adata = adata[:, adata.var.highly_variable] # For filtering non Highly variable genes. We usually keep all the genes. 

In [None]:
sc.pp.regress_out(adata, ['total_counts','pct_counts_MT'])

# Got it from: https://github.com/theislab/single-cell-tutorial/issues/35
sc.pp.scale(adata, max_value=10)
sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver='arpack')
sc.pp.neighbors(adata)
sc.tl.umap(adata) 

In [None]:
processing_batch(adata, key='treatment')    # Starting with a low resolution of leiden clustering

### Ranking genes ( = top 20)

In [None]:
# print(adata.var_names)

sc.tl.rank_genes_groups(adata, groupby='treatment', method='wilcoxon')

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=50, sharey=True)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(20)

# sc.pl.umap(adata[adata.obs['treatment'] == 'control'], color = ['PLAC8', 'CD83', 'CD74', 'CD79B', 'CD38', 'IL10', 'CD7'], size=50)

In [None]:
adata.obs['treatment'].value_counts()

In [None]:
adata.obs

In [None]:
adata.var

### UMAP's

#### T cells

In [None]:
# Colors the gene of interest

# T cells. 'CCR7' is a marker for T cells and also B cells. Markers from Roy's Extended_Data_Figures.
# Markers for T cells https://www.beckman.com/resources/cell-types/blood-cells/leukocytes/lymphocytes/t-cells
sc.pl.umap(adata, color=['CD3E', 'CD28', 'STMN1', 'GATA3', 'LEF1',
                         'RORA', 'GPR171', 'IL10', 'CD7', 'CCR7',
                         'S100A4', 'CXCR4', 'CD226', 'CD3D', 'CD4',
                         'NKG7', 'CD247', 'TCF7', 'GPR183_1', 'GPR183_2',
                         'ALOX5AP','RPL37','RPL38'], size=30)
sc.pl.umap(adata, color='leiden', size=30)

In [None]:
sc.pl.umap(adata, color=['CD3D','CD3E','CD37','GATA3','CXCR4','GPR183_2'], size=30)

In [None]:
sc.pl.umap(adata, color=['BANK1','BACH2','CD74','PXK'], size=30)

#### RBC cells

In [None]:
# RBC markers from Roy's data and from panglaodb website

sc.pl.umap(adata, color=['ALAS2','STMN1','HBA','HBM','SLC4A1','ANK1'], size=20) 

celltype = {'0':'0','1':'T_cells','2':'2','3':'3','4':'T_cells','5':'5','6':'6'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### B cells

In [None]:
# B cells markers from Roy's data and from https://panglaodb.se/markers.html?cell_type=%27B%20cells%27

sc.pl.umap(adata, color=['CD79B', 'CD38', 'IRF8', 'STMN1','IGJ',
                         'TCF4', 'CCR7', 'PXK', 'BANK1', 'BACH2','CD74'], size=30) # B_cells. 'JCHAIN'

celltype = {'0':'0','1':'T_cells','2':'2','3':'3','4':'T_cells','5':'RBC','6':'6'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### Platelets

In [None]:
# Platelets markers from Roy's data and from https://panglaodb.se/markers.html?cell_type=%27Platelets%27#google_vignette

sc.pl.umap(adata, color=['TUBB1','PLEK','B2M','GP1BA_3','F13A1',
                         'SERPINE1_1','SERPINE1_2','GNAS','TAGLN2',
                         'OAZ1_1','OAZ1_2','CD226','PECAM1'], size=30)

celltype = {'0':'0','1':'T_cells','2':'B_cells','3':'3','4':'T_cells','5':'RBC','6':'6'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### Monocytes

##### Not filtered monocytes markers

In [None]:
# Monocytes markers from Roy's data and from https://panglaodb.se/markers.html?cell_type=%27Monocytes%27

sc.pl.umap(adata, color=['SPI1','RGS2','CSF1R','S100A4','C1QA','LYZ_2',
                         'LYZ_3','LGALS3_1','CSF1R','MAFB','CD7','TET2',
                         'CD40','TLR4','S100A10','S100A11','CSTA_1','CSTA_4',
                         'CCR2_1','CCR2_2','CCR2_5','TNFRSF14_1','TNFRSF14_2',
                         'MRC1_1','CCL8','TXN','MAFB','TYROBP','ACP5',
                         'GBP1_1','IRF7','PLSCR1','S100A9','RGS1','S100A4',
                         'S100A6','PPBP_1'], size=30)

celltype = {'0':'Platelets','1':'T_cells','2':'B_cells','3':'3','4':'T_cells','5':'RBC','6':'6'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

##### Filtered monocytes markers

In [None]:
# Monocytes markers I decided to show because they are more informative

sc.pl.umap(adata, color=['SPI1','RGS1','RGS2','CSF1R','C1QA','LYZ_2',
                         'LYZ_3','S100A4','S100A6','S100A9','S100A10',
                         'S100A11','CSTA_1','CSTA_4','TXN','PLSCR1',
                         'PPBP_1','PPBP_2','PPBP_3','PPBP_4','RPS8_1'], size=20)

celltype = {'0':'Platelets','1':'T_cells','2':'B_cells','3':'Monocytes','4':'T_cells','5':'RBC','6':'Monocytes'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

#### TLR genes in adata

In [None]:
sc.pl.umap(adata, color=['TLR5','TLR3','TLR2_1','TLR2_2','TLR1_1','TLR1_2','TLR4','TLR8','TLR7','TLR21'], size=50)

celltype = {'0':'Platelets','1':'T_cells','2':'B_cells','3':'Monocytes','4':'T_cells','5':'RBC','6':'Monocytes'}
adata.obs['cell_type'] = adata.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata, color='cell_type', size=20)

### TLR genes in adata by treatment

In [None]:
TLR_genes = ['TLR5','TLR3','TLR2_1','TLR2_2','TLR1_1','TLR1_2','TLR4','TLR8','TLR7','TLR21']

In [None]:
sc.pl.umap(adata[adata.obs['treatment'] == 'control'], color=TLR_genes, size=50)
sc.pl.dotplot(adata, TLR_genes, groupby='treatment', standard_scale='var')

sc.pl.umap(adata[adata.obs['treatment'] == 'lps'], color=TLR_genes, size=50)
sc.pl.dotplot(adata, TLR_genes, groupby='treatment', standard_scale='var')

sc.pl.umap(adata[adata.obs['treatment'] == 'pic'], color=TLR_genes, size=50)
sc.pl.dotplot(adata, TLR_genes, groupby='treatment', standard_scale='var')

### Checking marker genes

In [None]:
sc.pl.umap(adata, color=find_gene('cxc'), size=40)

In [None]:
sc.pl.umap(adata, color=find_gene('TLR'), size=40) #dc - 'ZBTB46','LAMP3','IL6R','CD83'

In [None]:
sc.pl.umap(adata, color=find_gene('RPS'), size=40)

In [None]:
genes_from_umap = list(find_gene('RPS'))
print(genes_from_umap)

#### Checking genes of interest by treatment

##### control

In [None]:
sc.pl.umap(adata[adata.obs['treatment'] == 'control'], color='TLR3', size=30)
sc.pl.dotplot(adata, 'TLR3', groupby='treatment', standard_scale='var')

# sc.pl.umap(adata[adata.obs['treatment'] == 'lps'], color='CD3D', size=30)
# sc.pl.dotplot(adata, 'CD3D', groupby='treatment', standard_scale='var')

sc.pl.umap(adata[adata.obs['treatment'] == 'pic'], color='TLR3', size=50)
sc.pl.dotplot(adata, 'TLR3', groupby='treatment', standard_scale='var')

##### lf

In [None]:
sc.pl.umap(adata[adata.obs['treatment'] == 'lf'], color='S100A9', size=30)
sc.pl.dotplot(adata, 'S100A9', groupby='treatment', standard_scale='var')

##### lps

In [None]:
sc.pl.umap(adata[adata.obs['treatment'] == 'lps'], color='ACE', size=30)
sc.pl.dotplot(adata, 'ACE', groupby='treatment', standard_scale='var')

##### pic

In [None]:
pic_genes = ['RSAD2', 'ISG15', 'IFIH1']

IFN_genes = ['ENSCPBG00000019034', 'ENSCPBG00000019028', 'ENSCPBG00000019024', 
             'IFNK_1','GVINP1_7', 'IFIT5', 'GVINP1_1', 'GVINP1_2', 'GVINP1_3',
             'GVINP1_4', 'GVINP1_5', 'GVINP1_6', 'ENSCPBG00000000839'] 

sc.pl.umap(adata[adata.obs['treatment'] == 'pic'], color=pic_genes, size=30)
sc.pl.dotplot(adata, pic_genes, groupby='treatment', standard_scale='var')

#### adata observations

In [None]:
immune_cells = ['RBC', 'T_cells', 'B_cells', 'Monocytes', 'Platelets']

adata_filtered = adata[adata.obs['cell_type'].isin(immune_cells)]

adata_filtered.obs

# adata.write(r'/Code/data/single_cell/animals/turtle/turtle_3/''turtle_3_cell_types.h5ad')

# adata.obs

### ISG_genes

In [None]:
adata_filtered.obs['cell_type'].value_counts()

In [None]:
df_isg_genes = pd.read_csv('/Code/data/single_cell/animals/turtle/isg_genes_turtle.csv', index_col=0)
df_isg_genes = df_isg_genes.sort_index()

df_isg_genes

In [None]:
isg_genes_list = df_isg_genes.index.tolist()
isg_genes_list

#### UMAP's of the ISG genes

In [None]:
celltype = {'0':'Platelets','1':'T_cells','2':'B_cells','3':'Monocytes','4':'T_cells','5':'RBC','6':'Monocytes'}
adata_filtered.obs['cell_type'] = adata_filtered.obs.leiden.map(celltype).astype('category')
sc.pl.umap(adata_filtered, color=isg_genes_list, size=50)

#### Dotplot of the ISG genes in expressed in turtle

In [None]:
sc.pl.dotplot(adata_filtered, isg_genes_list, groupby='treatment', standard_scale='var')

### Subclustering

#### T cells

In [None]:
# Subclustering the T_cells cluster i found. there are two options to do the same function:

subset_adata_t = adata[adata.obs.cell_type == 'T_cells'].copy()
subset_adata_t

# t_cluster = adata[adata.obs.leiden.isin(['1','4'])]

# bbknn.bbknn(t_cluster, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
# sc.tl.umap(t_cluster)
# sc.tl.leiden(t_cluster, resolution=0.4)
# sc.pl.umap(t_cluster, color =['leiden'])

In [None]:
bbknn.bbknn(subset_adata_t, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
sc.tl.umap(subset_adata_t)
sc.tl.leiden(subset_adata_t, resolution=0.4)
sc.pl.umap(subset_adata_t, color=['leiden','CD3E','CD3D','CD7','TRBV25-1_1','LDHB'], size=30)

In [None]:
sc.tl.rank_genes_groups(subset_adata_t, 'leiden', method='wilcoxon') # Can also be 'cell_type' once annotated
sc.pl.rank_genes_groups(subset_adata_t, n_genes=25, sharey=False)
pd.set_option('display.max_columns', None)
pd.DataFrame(subset_adata_t.uns['rank_genes_groups']['names']).head(100).to_csv(r'/Code/data/single_cell/animals/turtle/turtle_3/''subset_adata_rank_genes_t_cells_top100.csv', index=False)

In [None]:
sc.pl.umap(subset_adata_t[subset_adata_t.obs['treatment'] == 'control'], color='CD3E', size=30)
sc.pl.dotplot(subset_adata_t, 'CD3E', groupby='treatment', standard_scale='var')

sc.pl.umap(subset_adata_t[subset_adata_t.obs['treatment'] == 'lps'], color='CD3E', size=30)
sc.pl.dotplot(subset_adata_t, 'CD3E', groupby='treatment', standard_scale='var')

In [None]:
sc.tl.dendrogram(subset_adata_t, groupby='leiden')
sc.pl.dendrogram(subset_adata_t, groupby='leiden')

In [None]:
subset_adata_t.obs['leiden'].value_counts()

In [None]:
sc.tl.leiden(subset_adata_t, resolution=0.4)
clusgene = list(pd.DataFrame(subset_adata_t.uns['rank_genes_groups']['names']).head(100)['0'])
sc.pl.dotplot(subset_adata_t, [i for i in clusgene[:50] if i.startswith('RP') == False], groupby='leiden', standard_scale=None)
sc.pl.dotplot(subset_adata_t, ['CD3D', 'CD3E', 'CD28', 'GATA3', 'LEF1',
                             'RORA', 'GPR171', 'IL10', 'CD7', 'CCR7',
                             'S100A4', 'CXCR4', 'CD226', 'STMN1'],
              groupby='leiden', standard_scale='var')
sc.pl.umap(subset_adata_t, color='leiden')    # For visualization only

In [None]:
# sc.pl.rank_genes_groups_dotplot(subset_adata, n_genes=5, standard_scale='var')
sc.pl.umap(subset_adata_t, color=find_gene('CD4') + find_gene('CD8') + find_gene('NKG7')
           + find_gene('CD226') + find_gene('IL2RA') + find_gene('CD247') 
           +find_gene('TCF7'), size=100)
sc.pl.umap(subset_adata_t, color='treatment') 
sc.pl.umap(subset_adata_t, color='leiden')    # For visualization only

In [None]:
sc.pl.umap(subset_adata_t, color=['leiden','LDHB','RPS15_2','CRIP1','FTL_1',
                                  'LGALS1','EFHD2','RPS9','RGS1','EIF5','NUB1'], size=30)

In [None]:
# sub_t_cells = {'0':'0','1':'1','2':'2','3':'3','4':'4'}
# sub_t_cells.obs['cell_type'] = sub_t_cells.obs.leiden.map(celltype).astype('category')
# sc.pl.umap(sub_t_cells, color='cell_type', size=20)

In [None]:
# sc.pp.normalize_total(subset_adata)
# sc.pp.log1p(subset_adata)
# sc.pp.highly_variable_genes(subset_adata)
# sc.pp.scale(subset_adata)
# sc.pp.pca(subset_adata)
# sc.pp.neighbors(subset_adata)
# sc.tl.louvain(subset_adata)  # or sc.tl.leiden, sc.tl.louvain, etc.

In [None]:
# adata = adata[adata.obs.index.isin(cluster_t)]

# cluster_t = adata[adata.obs.leiden.isin(['1', '4'])].obs.index

# cluster_t

In [None]:
# bbknn.bbknn(subset_adata, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
# sc.tl.umap(subset_adata)
# sc.tl.leiden(subset_adata, resolution=0.4)
# sc.pl.umap(subset_adata, color=['leiden','CD3E','CD7'])
# sc.tl.rank_genes_groups(subset_adata, 'leiden', method='wilcoxon')  # Can also be 'cell_type' once annotated
# sc.pl.rank_genes_groups(subset_adata, n_genes=25, sharey=False)
# pd.set_option("display.max_columns", None)
# # pd.DataFrame(subset_adata.uns['rank_genes_groups']['names']).head(20)

In [None]:
sc.pl.rank_genes_groups_dotplot(subset_adata_t, n_genes=6, standard_scale='var')

#### B cells

In [None]:
# Subclustering the B_cells cluster I found. There are two options to do the same function:

subset_adata_b = adata[adata.obs.cell_type == 'B_cells'].copy()
subset_adata_b

# b_cluster = adata[adata.obs.leiden.isin(['1','4'])]

# bbknn.bbknn(b_cluster, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
# sc.tl.umap(b_cluster)
# sc.tl.leiden(b_cluster, resolution=0.4)
# sc.pl.umap(b_cluster, color =['leiden'])

In [None]:
bbknn.bbknn(subset_adata_b, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
sc.tl.umap(subset_adata_b)
sc.tl.leiden(subset_adata_b, resolution=0.4)
sc.pl.umap(subset_adata_b, color=['leiden','CD79B','IRF8','CCR7','BANK1','CD74','KLF2'], size=30)

In [None]:
sc.tl.rank_genes_groups(subset_adata_b, 'leiden', method='wilcoxon') # Can also be 'cell_type' once annotated
sc.pl.rank_genes_groups(subset_adata_b, n_genes=25, sharey=False)
pd.set_option('display.max_columns', None)
pd.DataFrame(subset_adata_b.uns['rank_genes_groups']['names']).head(100).to_csv(r'/Code/data/single_cell/animals/turtle/turtle_3/''subset_adata_rank_genes_b_cells_top100.csv', index=False)

In [None]:
sc.pl.umap(subset_adata_b[subset_adata_b.obs['treatment'] == 'control'], color='KLF2', size=30)
sc.pl.dotplot(subset_adata_b, 'CD79B', groupby='treatment', standard_scale='var')

sc.pl.umap(subset_adata_b[subset_adata_b.obs['treatment'] == 'lps'], color='KLF2', size=30)
sc.pl.dotplot(subset_adata_b, 'CD79B', groupby='treatment', standard_scale='var')

In [None]:
sc.tl.dendrogram(subset_adata_b, groupby='leiden')
sc.pl.dendrogram(subset_adata_b, groupby='leiden')

In [None]:
subset_adata_b.obs['leiden'].value_counts()

In [None]:
sc.tl.leiden(subset_adata_b, resolution=0.4)
clusgene = list(pd.DataFrame(subset_adata_b.uns['rank_genes_groups']['names']).head(100)['2'])
sc.pl.dotplot(subset_adata_b, [i for i in clusgene[:50] if i.startswith('RP') == False], groupby='leiden', standard_scale=None)
sc.pl.dotplot(subset_adata_b, ['CD79B', 'CD38', 'IRF8', 'STMN1','IGJ',
                               'TCF4', 'CCR7', 'PXK', 'BANK1', 'BACH2',
                               'CD74'], groupby='leiden', standard_scale='var')
sc.pl.umap(subset_adata_b, color='leiden')    # For visualization only

In [None]:
# sc.pl.rank_genes_groups_dotplot(subset_adata_b, n_genes=5, standard_scale='var')
sc.pl.umap(subset_adata_b, color=find_gene('CD79B') + find_gene('IRF8') + find_gene('CCR7')
           + find_gene('BANK1') + find_gene('CD74') + find_gene('CD40') + find_gene('CD44')
           + find_gene('TCF7') + find_gene('KLF2') + find_gene('COX'), size=50)

sc.pl.umap(subset_adata_b, color='treatment') 
sc.pl.umap(subset_adata_b, color='leiden')    # For visualization only

In [None]:
# Checking potential marker genes from pangaloDB

sc.pl.umap(subset_adata_b, color=['leiden','PXK','KLF2','MT-COX1'], size=30)

In [None]:
sc.pl.rank_genes_groups_dotplot(subset_adata_b, n_genes=6, standard_scale='var')

#### RBC

In [None]:
# Subclustering the RBC cluster I found

subset_adata_rbc = adata[adata.obs.cell_type == 'RBC'].copy()
subset_adata_rbc

In [None]:
bbknn.bbknn(subset_adata_rbc, batch_key='treatment', neighbors_within_batch=2, metric='euclidean', n_pcs=30, trim=None)
sc.tl.umap(subset_adata_rbc)
sc.tl.leiden(subset_adata_rbc, resolution=0.4)
sc.pl.umap(subset_adata_rbc, color=['leiden','ALAS2','STMN1','HBA','HBM','SLC4A1'], size=30)

In [None]:
sc.tl.rank_genes_groups(subset_adata_rbc, 'leiden', method='wilcoxon') # Can also be 'cell_type' once annotated
sc.pl.rank_genes_groups(subset_adata_rbc, n_genes=25, sharey=False)
pd.set_option('display.max_columns', None)
pd.DataFrame(subset_adata_rbc.uns['rank_genes_groups']['names']).head(100).to_csv(r'/Code/data/single_cell/animals/turtle/turtle_3/''subset_adata_rank_genes_rbc_top100.csv', index=False)

In [None]:
sc.pl.umap(subset_adata_rbc[subset_adata_rbc.obs['treatment'] == 'control'], color='HBA', size=30)
sc.pl.dotplot(subset_adata_rbc, 'HBA', groupby='treatment', standard_scale='var')

sc.pl.umap(subset_adata_rbc[subset_adata_rbc.obs['treatment'] == 'lps'], color='HBA', size=30)
sc.pl.dotplot(subset_adata_rbc, 'HBA', groupby='treatment', standard_scale='var')

In [None]:
sc.tl.dendrogram(subset_adata_rbc, groupby='leiden')
sc.pl.dendrogram(subset_adata_rbc, groupby='leiden')

In [None]:
subset_adata_rbc.obs['leiden'].value_counts()

In [None]:
sc.tl.leiden(subset_adata_rbc, resolution=0.4)
clusgene = list(pd.DataFrame(subset_adata_rbc.uns['rank_genes_groups']['names']).head(100)['0'])
sc.pl.dotplot(subset_adata_rbc, [i for i in clusgene[:50] if i.startswith('RP') == False], groupby='leiden', standard_scale=None)
sc.pl.dotplot(subset_adata_rbc, ['ALAS2','STMN1','HBA','HBM','SLC4A1'], groupby='leiden', standard_scale='var')
sc.pl.umap(subset_adata_rbc, color='leiden', size=50)    # For visualization only

In [None]:
sc.pl.umap(subset_adata_rbc, color=find_gene('RPS') + find_gene('PPA1') + find_gene('ANK1') + find_gene('HB'))

sc.pl.umap(subset_adata_rbc, color='treatment')
sc.pl.umap(subset_adata_rbc, color='leiden')

#### Monocytes

#### Platelets

## Gene search tools
This function returns a list of genes that are present in adata (markers for example) from an input of a general list.

In [None]:
def find_gene(gene):
    GENES = list(adata.raw.var.index[adata.raw.var.index.str.startswith(gene.upper())])
    return GENES

find_gene('cd3')

In [None]:
# adata.var[adata.var_names == 'CD209']
adata.var[adata.var['gene_ids'] == 'ENSCPBG00000006789']

## Markers lists

### Markers from Roy's data

###### monocytes = ['MRC1','CCL8-1','C1QA','TXN','MAFB','TYROBP','LYZ','SPI1','LGALS3','RGS2','CSTA-1','S100A4','S100A10','CSF1R']

###### neutrophils = ['S100A10','CSF1','IL1R2','CFP','PLAC8']

###### mafb_monocytes = ['MAFB','DUSP1','JUNB','TNF','KLF4','NFKBIA','NFKBIZ','CFP']

###### t_cells = ['CD3D','CD247','TCF7','GPR183,'CD3E','CD28','STMN1','GATA3','LEF1','RORA','GPR171','IL10','CD7','CCR7','S100A4','CXCR4','CD226','CD4','NKG7','CD247','ALOX5AP','RPL37','RPL38']

###### b_cells = ['IGHM','CD79A','PLAC8','CD74-1','CD83','CD74','CD79B','CD7','CCR7','JCHAIN','HLA-DRA-1']  

###### rbc = ['HBA','HBM','ALAS2','STMN1','SLC4A1','ANK1']

###### rbc_pre = ['HBA','HBM','ALAS2','TOP2A','STMN1']

###### dc = ['XCR1','CD200','IL18','CST3','CSTA-1','SPIB','IRF8','HLA-DRA-1'] #ALL dcS

###### platelets = ['PLEK','CD36','SERPINE1','PECAM1','B2M','TUBB1','GP1BA_3','F13A1','GNAS','TAGLN2','OAZ1','CD226'] 

###### pdc = ['IRF8','SPIB','HLA-DRA-1','CD3D','IGHM','CST3'] No XCR1, IL18 and CD200 like in other DC cluster

###### Interferon like = ['ENSCPBG00000019034','ENSCPBG00000019028','ENSCPBG00000019024',
###### 'ENSCPBG00000019021','ENSCPBG00000010021','ENSCPBG00000026504','ENSCPBG00000010704',
###### 'ENSCPBG00000002440','ENSCPBG00000010703','ENSCPBG00000009296','ENSCPBG00000009266',
###### 'ENSCPBG00000011467','ENSCPBG00000000839']

- https://www.sc-best-practices.org/cellular_structure/annotation.html

### Interferon like markers I found

###### 'ENSCPBG00000019034' - Interferon Beta like 

###### 'ENSCPBG00000019028' - Interferon Beta like

###### 'ENSCPBG00000019024' - Interferon Beta like

###### 'ENSCPBG00000019021' - Interferon Kappa like/Epsilon/Beta

###### 'ENSCPBG00000010021' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000026504' - Interferon induced protein with tetratricopeptide repeats 5 like

###### 'ENSCPBG00000010704' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000002440' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000010703' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000009296' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000009266' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000011467' - Interferon induced very large GTPase 1 like

###### 'ENSCPBG00000000839' - Interferon alpha inducible protein 27