# Figures associated to immune processing

**You need to tun the notebook 2_ first!!!!**

In [None]:
from cellassign import assign_cats
import gzip
import itertools as itl
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import triku as tk
import itertools

In [None]:
from functions import adata_plot_KOvsWT, stat_annot_gene
from functions import dict_WT_KO_colors

In [None]:
from datetime import date

today = date.today()
today = today.strftime("%Y_%m_%d")

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

In [None]:
mpl.rcParams['figure.dpi'] = 200

pd.set_option('display.max_columns', None)

## Functions and variables

In [None]:
%store -r dict_WT_KO_colors
%store -r seed

## Load adata

In [None]:
adata_all_harmony = sc.read_h5ad('adatas/adata_all_harmony.h5')
adata_imm = adata_all_harmony[adata_all_harmony.obs['merged_cell_type'].isin(['APC', 'Lymphoid', 'Neutrophil', 'Mast cell'])]

In [None]:
sc.pp.filter_genes(adata_imm, min_counts=1)

In [None]:
sc.pp.pca(adata_imm, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata_imm, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata_imm, random_state=seed, n_neighbors=int(len(adata_imm) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')
tk.tl.triku(adata_imm)

sc.pp.pca(adata_imm, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata_imm, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata_imm, random_state=seed, n_neighbors=int(len(adata_imm) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')

### Subpopulation annotation

To do the annotation we used the characterization made by Joost et al. 2020. There are some populations that are not appearing (Krt75+ to Krt18+) and we are not sure about their functional relationship

In [None]:
dict_immune = {    
                  'T cell': ['Tnfrsf18', 'Cxcr6', 'Ccr2', 'Ifngr1', 'Cd52', 'Rora', 'Il7r', 'Aebp2', 'Icos', 'Dgat1', 'Shisa5', 'Grap', 'Cd27', 'Pdcd1', 'Ikzf3', 'Il18r1', 
                             'Gata3', 'Spn', 'Tnfrsf4'],
                  'B cell': ['Vpreb3', 'Cd79a', 'Cd79b', 'Chchd10', 'Ighm',  'Stmn1', 'Igll1', 'Dut', 'Top2a', 'Rasgrp2', 'Myb', 'Ssrp1', 'Gimap6', 'Selenoh', 'Apobec3', 'Smc2',
                             'Cks1b', 'Lgals9', 'Ebf1', 'Csrp2', 'Adgrg1', 'Pou2af1', 'Tcf3', 'Hist1h1a', 'Parp1', 'Lig1', 'Dntt', 'Sox4'],
                  'Plasma cell': ['Iglc3', 'Ms4a1', 'Iglc2', 'Ighm', 'Igkc', 'Cd79a', 'Ly6d', 'Cd79b', 'Tnfrsf13c', 'Iglc1', 'Sdc1', 'Cd74', 'Txndc16', 'Ighd'],
                  'NK cell': ['Nkg7', 'Ctsw', 'Cd7', 'Rab4a', 'Ctla2a', 'Rgs2', 'Cd3d', 'Cd3g', 'Fermt2', 'Tcrg-C1', 'Klrk1', 'Trf', 'Itk', 'Ubald2', 'Il2rb', 'Map1b', 'Klrb1b', 'Cd2', 
                              'Trbc2', 'Abcb1b', 'Rasl11b', 'Parp8', 'Tiam1'],
                  'Langerhans cell': ['Mfge8', 'Cd207', 'Cldn1', 'Pgf', 'Cd74', 'H2-Aa', 'H2-Eb1',  'Nr4a3', 'Il1r2', 'Pkib', 'Grk3', 'Ktn1', 'Pxdc1', 'Dnah2', 'Timd4', 
                                      'Epcam', 'Ccr7', 'St14', 'Mreg', 'Cd40'],
#                   'Langerhans $LSR^+$': ['Cd207', 'H2-DMa', 'Ltc4s', 'H2-DMb1', 'Epcam', 'Napsa', 'Hspa1a', 'Lsr', 'Ptpn6', 'Scimp', 'Ptgs1', 'Cyp4f16', 'Tcea3', 'Cd72', 'Cdh1', 'Spint1',
#                                          'Dok3', 'Tmem45b', 'Alox5', 'Itgax', 'Fads1', 'Gna15', 'Scap'],
                  'Dendritic cell': ['Cd209a', 'Cd209d', 'Slc39a14', 'Plbd1', 'Aif1', 'Cd74', 'Clec4e', 'Tgfbi', 'Pdpn', 'Cxcl3', 'Vat1', 'Eps8', 'Tppp3', 
                                     'Procr', 'Tarm1', 'Clec4a3', 'Ccl5'],
                  'M1': ['Fscn1', 'Cd200', 'Il4i1', 'Pkib', 'Wnk1', 'Cd74', 'Cst3', 'Ccr7', 'Bcl2a1a', 'Cacnb3', 'Ccl22', 'Gm13546', 'Serpinb9b', 'Relb', 'Cd40', 'Arl5a'],
                  'M2': ['Pf4', 'C1qb', 'Igf1', 'Grn', 'Dab2', 'C1qc', 'Apoe', 'C1qa', 'Ms4a7', 'Timp2', 'Cd36', 'Mrc1', 'Fxyd2', 'F13a1', 'Rgl1', 'Ednrb', 'Hgsnat', 'Tgfbi', 'Rnase4', 
                                 'Sash1', 'Folr2', 'Gas6', 'Stab1', 'Ccl8', 'Fcgrt', 'Ms4a6c', 'P2rx7'],
                  'Monocyte': ['Nos2', 'Saa3', 'Arg1', 'Wfdc17', 'Csf1r', 'F10', 'Ly6c2', 'Gsr', 'Anxa5', 'Ptgs2', 'Cxcl3', 'Pid1', 'Serpinb2', 'Csf3', 'Vcan', 'Inhba', 'Ptges', 
                               'F13a1', 'Met', ],
                  'Neutrophil': ['Camp', 'Ltf', 'Chil3', 'S100a8', 'S100a9', 'Tinagl1', 'Stfa1', 'Ngp', 'Chil3', 'Cd177', 'Adpgk', 'Syne1', 'Cebpe', 'Hsd11b1', 'Mgst1', 
                                 'Tst', 'Ly6c2', 'Pilrb2', 'Fcnb', 'Rab44', 'Nfe2', 'Cpne3', 'Ldhc', 'Hmgn2', 'Adgrg3'],
                  'Act. neutrophil': ['Acod1', 'Il1f9', 'Csta3', 'Rhov', 'Stfa2l1', 'Ccl3', 'Hcar2', 'Slpi', 'Ccrl2', 'Il1rn', 'Cxcl3', 'G0s2', 
                                      'Stfa2', 'Ptgs2', 'Asprv1', 'Fpr1'],
                  'Mast cell': ['Cma1', 'Tpsb2', 'Mcpt4', 'Fdx1', 'Hs3st1', 'Cd55', 'Tpsab1', 'Hdc', 'Tph1', 'Cpa3', 'Fgfr1', 'Gata2', 'Slco2b1', 'Vwa5a', 'Il1rl1', 
                                'Cyp11a1', 'Adora3', 'Kit', 'Ap1s3', 'Hexb', 'Creb3l1', 'Mrgprb1', 'Rab27b', 'Penk', 'Hs6st2', 'Adamts9', 'Akap12', 'Slc45a3', 'Mrgprb2', 'Gnai1'], 
                  'Basophil': ['Mcpt8', 'Cyp11a1', 'Cd200r3', 'Ifitm1', 'Ero1l', 'Ccl6', 'Ccl3', 'Lilrb4a', 'Gata2', 'Ccl4', 'Hdc', 'Osm', 'Lat2', 'Tec', 'Rab44', 'Csf2rb2', 
                               'Os9', 'Ier3', 'Aqp9', 'Cd200r4', 'Emilin2', 'Csf1', 'Slpi', 'Hmox1', 'Mboat1', 'Hs3st1', 'Adgrg3', ]
                  }

In [None]:
dict_fbs_colors = {'Act. neutrophil': '#D3BC5F',
                     'B cell': '#229EB2',
                     'Basophil': '#A05A2C',
                     'Dendritic cell': '#FF5599',
                     'Langerhans cell': '#D40055',
                     'M1': '#8E0C6F',
                     'M2': '#4D0765',
                     'Mast cell': '#FF9955',
                     'Monocyte': '#AE54B8',
                     'NK cell': '#702A17',
                     'Neutrophil': '#A5901A',
                     'Plasma cell': '#20668D',
                     'T cell': '#77ECF1',}

In [None]:
sc.tl.leiden(adata_imm, resolution=4)
assign_cats(adata_imm, dict_cats=dict_immune, min_score=0.4, quantile_gene_sel=0.7, key_added='subtype')

adata_imm.uns['subtype_colors'] = list(dict_fbs_colors.values())

## UMAP of fibroblast subpopulations and batch

In [None]:
sc.tl.umap(adata_imm, min_dist=1)
sc.pl.umap(adata_imm, color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma)

In [None]:
sc.tl.umap(adata_imm, min_dist=1)
sc.pl.umap(adata_imm, color=['batch', 'subtype', 'Klrb1c'], ncols=2, frameon=False, cmap=magma)

In [None]:
sc.pl.umap(adata_imm[adata_imm.obs['batch'].isin(['KO1', 'KO2'])] , color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma, size=35)
sc.pl.umap(adata_imm[adata_imm.obs['batch'].isin(['WT1', 'WT2'])] , color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma, size=35)

## Proportion of cell types within WT and KO

In [None]:
adata_plot_KOvsWT(adata_imm, list(dict_immune.keys()), col_cell_type='subtype')

## Table with markers and p-values

In [None]:
sc.tl.rank_genes_groups(adata_imm, groupby='subtype')
names = pd.DataFrame(adata_imm.uns['rank_genes_groups']['names'])
pvals = pd.DataFrame(adata_imm.uns['rank_genes_groups']['pvals_adj'])

In [None]:
# Create a DataFrame with 30 most important genes
list_dfs = []
for subtype, genes in dict_immune.items():
    df = pd.DataFrame({f'{subtype} genes': names[subtype], f'p-value {subtype}': pvals[subtype]}, dtype=object)
    df_short = df[df[f'{subtype} genes'].isin(genes)].sort_values(by=f'p-value {subtype}', ascending=True).iloc[:30].reset_index(drop=True)
    df[f'p-value {subtype}'] = df[f'p-value {subtype}'].apply(lambda x: '%.3E' % x).values.tolist()
    list_dfs.append(df_short)

In [None]:
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_4_pops.csv', index=False)
df_all_celltypes

## Dotplot with markers of fibroblast subtypes

In [None]:
sc.pl.dotplot(adata_imm, groupby='subtype', var_names=np.array([i[:3] for i in dict_immune.values()]).ravel(), dendrogram=False, show=False, 
             categories_order=list(dict_immune.keys()))

## UMAP highlighting the activated neutrophil population

In [None]:
sc.pl.umap(adata_imm, color=['batch', 'subtype', 'Acod1', 'Il1f9', 'Csta3'], ncols=3, frameon=False, cmap=magma)

## UMAP highlighting the expression of specific markers

In [None]:
list_genes = ['Pdcd1', 'Cd274']
sc.pl.umap(adata_imm, color=['batch',  'subtype',], ncols=3, frameon=False, cmap=magma)
sc.pl.umap(adata_imm, color=list_genes, ncols=3, frameon=False, cmap=magma)

In [None]:
sc.pl.umap(adata_imm[adata_imm.obs['batch'].isin(['WT1', 'WT2'])], color=['batch', 'subtype',] + list_genes, ncols=4, frameon=False, cmap=magma, s=45)
sc.pl.umap(adata_imm[adata_imm.obs['batch'].isin(['KO1', 'KO2'])], color=['batch', 'subtype',] + list_genes, ncols=4, frameon=False, cmap=magma, s=45)

In [None]:
for gene in list_genes: 
    stat_annot_gene(gene, adata_imm, dict_immune, type_plot='violin', add_stats=True)

In [None]:
adata_imm.write_h5ad('adatas/adata_immune.h5')