# Figures associated to keratinocyte processing

**You need to tun the notebook 2_ first!!!!**

In [None]:
from cellassign import assign_cats
import gzip
import itertools as itl
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import triku as tk
import itertools
import matplotlib.pylab as pylab

In [None]:
from functions import adata_plot_KOvsWT, stat_annot_gene
from functions import dict_WT_KO_colors, plot_WT_KO_genes

In [None]:
from datetime import date

today = date.today()
today = today.strftime("%Y_%m_%d")

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

In [None]:
params = {'figure.dpi': 200,
          'legend.fontsize': 'large',
         'axes.labelsize': 'x-large',
         'axes.titlesize':'x-large',
         'xtick.labelsize':'x-large',
         'ytick.labelsize':'x-large', 
         'axes.grid': False}
pylab.rcParams.update(params)
# mpl.style.use('default')

pd.set_option('display.max_columns', None)

## Functions and variables

In [None]:
%store -r dict_WT_KO_colors
%store -r seed

## Load adata

In [None]:
adata_all_harmony = sc.read_h5ad('adatas/adata_all_harmony.h5')
adata_krt = adata_all_harmony[adata_all_harmony.obs['merged_cell_type_name'] == 'KRT']

In [None]:
sc.pp.filter_genes(adata_krt, min_counts=1)

In [None]:
sc.pp.pca(adata_krt, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata_krt, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata_krt, random_state=seed, n_neighbors=int(len(adata_krt) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')
tk.tl.triku(adata_krt, use_raw=False)

sc.pp.pca(adata_krt, random_state=seed, n_comps=50)
sce.pp.harmony_integrate(adata_krt, key='batch', max_iter_harmony=50, plot_convergence=True)
sc.pp.neighbors(adata_krt, random_state=seed, n_neighbors=int(len(adata_krt) ** 0.5 * 0.5), metric='cosine', use_rep='X_pca_harmony')

### Subpopulation annotation

To do the annotation we used the characterization made by Joost et al. 2020. There are some populations that are not appearing (Krt75+ to Krt18+) and we are not sure about their functional relationship

In [None]:
dict_krts = {   
                '0: Krt Krt27$^+$ | IRS1' : ['Krt25', 'Krt27', 'Krt71', 'Tchh', 'Prss53', 'Ttyh2', 'Crnn', 'Sprr1a', 
                                                   'Cdsn', 'Sytl2', 'Arg1', 'Padi1', 'Nrxn2', 'Tmem229b', 'Capn8'],
                
                '1: Krt Nkd2$^+$ | IRS2-6' : ['Krt73', 'Msx2', 'Nkd2', 'Ctsc', 'Crym', 'Fbp1', 'Dusp2', 'Taf13', 'Gata3', 'Dlx2', 
                                                    'Msx1', 'Prr9', 'Slc39a8', 'Wdr47', 'Csgalnact1', 'Neo1', 'Wnt5a'],
    
                '2: Krt Krt35$^+$ | CX': ['Krt35', 'Krt36', 'Rexo2', 'Mt4', 'Gm49425', 'Msx1', 'S100a3', 'Krtap7-1', 'Krtap8-1', 
                                           'Ly6g6d', 'Selenbp1', 'Krt81', 'Krtap13-1', 'Rab3ip', 'Krtap22-2'],
                
                '3: Krt Shisa2$^+$ | SB OL': ['Barx2', 'Krt6a', 'Sprr1a', 'Rnase12', 'Krt14', 'Gja1', 'Krt6a', 'Pdzrn3', 'Pthlh', 'Shisa2', 'Clasrp', 'Pfn2', 'Mgat4a'],
    
                '4: Krt Il11ra1$^+$ | B OL': ['Il11ra1', 'Col16a1', 'Tagln', 'Slc7a8', 'Cxcl14', 'Bgn', 'Vdr', 'Ucp2', 'Gclm', 'Col16a1', 'Smtn', 'Tbx1', 
                                                    'Myl9', 'Lamb1', 'Gclc', 'Slc1a3', 'Col4a2', 'Fhod3', 'Cxcl12', 'Cntfr'],
    
                '5: Krt Id3$^+$ | GL': ['Id3', 'Mt2', 'Fos', 'Mt1', 'Ier2', 'Krt17', 'Slc3a2', 'Fgfr1', 'Slpi', 
                                              'Tnc', 'Junb', 'Tnfrsf12a', 'Fgfr1', 'Thbs1', 'Ndrg1', 'Klf10', 'Nedd9',],
    
                '6: Krt Anln$^+$ | IFE C': ['Anln', 'Prc1', 'Cdk1', 'Cenpf', 'Dnph1', 'Cdca8', 'Birc5', 'Cdca3', 
                                          'Spc24', 'Nusap1', 'Cenpa', 'Ccnb1', 'Ccnb2', 'Racgap1', 'Cdc20'],
    
                '7: Krt Krt5$^+$ | IFE B (1)': ['Krt5', 'Igfbp2', 'Ly6a', 'Sparc', 'Cdh13', 'Il1r2', 'Efemp1', 'Smoc2', 'Ccdc3', 
                                          'Igfbp6', 'Antxr1', 'Ppic', 'Cotl1', 'Cav1', 'Thy1'], 
    
                '8: Krt Ifi202b$^+$ | IFE B (2)': ['Krt5', 'S100a10', 'Eif5', 'Serpinb2', 'Ifi202b', 'Cxcl16', 'Fosl1', 'Sqstm1', 
                                             'Tiparp', 'Tinagl1', 'Krt16', 'Ptgs2', 'Epgn', 'Adam8', 'Cnksr1', 'Ccdc71l'], 
    
    
                '9: Krt Krt10$^+$ | SB1': ['Krt1', 'Krt10', 'Mt4', 'Lgals3', 'Acsl1', 'Chit1', 'Endou', 'Krtdap', 
                                                       'Them5', 'Stfa1', 'Calm5', 'Gm15432', 'Sdr16c5', 'Tmem45a', 'Agpat3', 'Ablim1'], 
    

                '10: Krt Krt78$^+$ | SB2': ['Krt78', 'Dkkl1', 'Gm94', 'Skint5', 'Klk5', 'Klk8', 'Oas1f', 'Prxl2b', 'Tmprss4', 'Sbsn', 
                                                 'Ly6g6c', 'Cnfn', 'Agpat4', 'Hal', 'Pdzk1ip1', 'Rbp2', 'Crabp2', 'Nrtn', 'Mboat2', 'Susd2', 'Gjb5'], 

                
                '11: Krt Lor$^+$ | GR': ['Lor', 'Nccrp1', 'Trex2', 'Lce1a1', 'Lce1b', 'Flg', 'Lce1f', 'Hrnr', 'Crct1', 'Lce1g', 
                                         'Lce1e', 'Kprp', 'Il18', 'Lce1m', 'Lce1i', 'Lce1c', 'Lce1h', 'Lce1j', 'Flg2'],  # Granular
        
                '12: Krt Defb6$^+$ | uHF SB': ['Krt79', 'Krt17', 'Cst6', 'Ly6g6c', 'Defb6', 'Defb1', 'Klk7', 'Dpysl3', 
                                                       'Gata6', 'Alox12e', 'Nebl', 'Teddm3', 'Rbp1', 'Susd2', 'Pdzk1ip1', 'Ano9'],
                
                '13: Krt Sprr1b$^+$ | uHF ???': ['Krt16', 'Sprr1b', 'Sprr1a', 'Asprv1', 'Ehf', 'Sbsn', 'Krt80', 'Cnfn', 'Sprr2h', 'Gm9774', 'Ppif', 'Lpin2', 'Il1f9'],
    
                '14: Krt Cidea$^+$ | SG': ['Mgst1', 'Cidea', 'Ldhb', 'Acsbg1', 'Pparg', 'Bex1', 'Krt79', 'Psph', 'Far2', 'Cyp2g1', 'Elovl3',
                                          'Aadac', 'Glrx5', 'Sdc1', 'Eci2', 'Cbr1', 'Scp2', 'Aco1', 'Alad', 'Soat1', 'Cat', 'Lpl'],
    
                '15: Krt Cd74$^+$ | IMM': ['Cd74', 'H2-Eb1', 'H2-Aa', 'Vim', 'Tyrobp', 'Mfge8', 'Alox5ap', 'Cd52', 'Ltc4s', 'Gngt2', 'S100a4', 'Emp3', 'Fcer1g', 'Rgs1', 'Fxyd5', 
                                           'Napsa', 'Ptpn18', 'Hpgds', 'Ctss', 'Csf1r', 'Cd207',],
    
                '16: Krt Krt75$^+$ | CHN (1)???': ['Krt17', 'Tm4sf1', 'Gjb2', 'Wnt11', 'Slc39a6', 'Krt75', 'Fzd5', 'Clu', 'Trim2', 'Sulf2', 
                                           'Lap3', 'Gm44460', 'Sult2b1', 'Slc39a8', 'Gm12446', 'Cyth1', 'Cdr2'],
                '17: Krt Gpx2$^+$ | CHN (2)???': ['Gpx2', 'Ly6g6c', 'Krt6a', 'Cpm', 'Cryab', 'Fads3', 'Pinlyp', 'Pgap2', 'Cd82', 
                                          'Klk7', 'Palld', 'Acot1', 'Ss18l2', 'Phactr1', 'Kcne1', 'Gabrp', 'Klk10', 'Ace2'],
                '18: Krt Fxyd1$^+$': ['Sparc', 'Vim', 'Lgals1', 'Emp3', 'Crip1', 'S100a4', 'Col6a1', 'Fxyd1', 
                                           'Fstl1', 'Col3a1', 'Matn2', 'Col6a3', 'Zeb2', 'Col5a1', 'Dcn', 'Pcolce'],
                '19: Krt Myh11$^+$': ['Tagln', 'Col4a2', 'Col4a1', 'Acta2', 'Myh11', 'Igfbp2', 'Rbpms', 'Myl9', 
                                           'Gm48529', 'Ero1l', 'Anxa6', 'Ptprz1'],
                '20: Krt Krt18$^+$': ['Krt18', 'Krt9', 'Cldn3', 'Cystm1', 'Wfdc18', 'Ceacam1', 'Wfdc2', 'Mapre2', 
                                          'Sec14l1', 'Hk1', 'Kcnn4', 'Tesc', 'Rbm47', 'Tpd52l1', 'Ppp1r1b'],
                  }

In [None]:
dict_krt_colors = {  '0: Krt Krt27$^+$ | IRS1': '#67dcf1',
                     '1: Krt Nkd2$^+$ | IRS2-6': '#df7935',
                     '2: Krt Krt35$^+$ | CX': '#20668d',
                     '3: Krt Shisa2$^+$ | SB OL': '#a05a2c',
                     '4: Krt Il11ra1$^+$ | B OL': '#ff5599',
                     '5: Krt Id3$^+$ | GL': '#d40055',
                     '6: Krt Anln$^+$ | IFE C': '#8e0c6f',
                     '7: Krt Krt5$^+$ | IFE B (1)': '#d3bc5f',
                     '8: Krt Ifi202b$^+$ | IFE B (2)': '#4d0765',
                     '9: Krt Krt10$^+$ | SB1': '#229eb2',
                     '10: Krt Krt78$^+$ | SB2': '#67dcf1',
                     '11: Krt Lor$^+$ | GR': '#df7935',
                     '12: Krt Defb6$^+$ | uHF SB': '#20668d',
                     '13: Krt Sprr1b$^+$ | uHF ???': '#a05a2c',
                     '14: Krt Cidea$^+$ | SG': '#ff5599',
                     '15: Krt Cd74$^+$ | IMM': '#d40055',
                     '16: Krt Krt75$^+$ | CHN (1)???': '#8e0c6f',
                     '17: Krt Gpx2$^+$ | CHN (2)???': '#d3bc5f',
                     '18: Krt Fxyd1$^+$': '#4d0765',
                     '19: Krt Myh11$^+$': '#229eb2',
                     '20: Krt Krt18$^+$': '#67dcf1',}

In [None]:
sc.tl.leiden(adata_krt, resolution=4)
assign_cats(adata_krt, dict_cats=dict_krts, min_score=0.4, quantile_gene_sel=0.7, key_added='subtype')

In [None]:
sc.tl.leiden(adata_krt, resolution=4)
assign_cats(adata_krt, dict_cats=dict_krts, min_score=0.4, quantile_gene_sel=0.7, key_added='subtype')


adata_krt.obs['subtype_name'] = [i.split(': ')[1] for i in adata_krt.obs['subtype']]
adata_krt.obs['subtype_name'] = adata_krt.obs['subtype_name'].astype('category')
adata_krt.obs['subtype_name'].cat.reorder_categories([i.split(': ')[1] for i in dict_krts.keys()], inplace=True)

adata_krt.obs['subtype_number'] = [i.split(': ')[0] for i in adata_krt.obs['subtype']]
adata_krt.obs['subtype_number'] = adata_krt.obs['subtype_number'].astype('category')
adata_krt.obs['subtype_number'].cat.reorder_categories([i.split(': ')[0] for i in dict_krts.keys()], inplace=True)


adata_krt.uns['subtype_colors'] = list(dict_krt_colors.values())
adata_krt.uns['subtype_name_colors'] = list(dict_krt_colors.values())
adata_krt.uns['subtype_number_colors'] = list(dict_krt_colors.values())

## Table with markers and p-values

In [None]:
sc.tl.rank_genes_groups(adata_krt, groupby='subtype')
names = pd.DataFrame(adata_krt.uns['rank_genes_groups']['names'])
pvals = pd.DataFrame(adata_krt.uns['rank_genes_groups']['pvals_adj'])

In [None]:
for cell_type, list_cell_markers in dict_krts.items():
    print(cell_type)
    sc.pl.umap(adata_krt, color=[i for i in list_cell_markers if i in adata_krt.var_names], alpha=0.5, ncols=5, cmap=magma, use_raw=False)

In [None]:
# Create a DataFrame with 30 most important genes
list_dfs = []
for subtype, genes in dict_krts.items():
    df = pd.DataFrame({f'{subtype} genes': names[subtype], f'p-value {subtype}': pvals[subtype]}, dtype=object)
    df_short = df[df[f'{subtype} genes'].isin(genes)].sort_values(by=f'p-value {subtype}', ascending=True).iloc[:30].reset_index(drop=True)
    df[f'p-value {subtype}'] = df[f'p-value {subtype}'].apply(lambda x: '%.3E' % x).values.tolist()
    list_dfs.append(df)

In [None]:
df_all_celltypes = pd.concat(list_dfs, axis=1).iloc[:50]
df_all_celltypes.to_csv(f'results/{date.today().strftime("%Y_%m_%d")}-KRT_DEGs.csv.csv', index=False)
df_all_celltypes

## UMAP of fibroblast subpopulations and batch

In [None]:
sc.tl.umap(adata_krt, min_dist=1.1)
sc.pl.umap(adata_krt, color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma)

In [None]:
sc.pl.umap(adata_krt[adata_krt.obs['batch'].isin(['KO1', 'KO2'])] , color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma, size=35)
sc.pl.umap(adata_krt[adata_krt.obs['batch'].isin(['WT1', 'WT2'])] , color=['batch', 'subtype'], ncols=2, frameon=False, cmap=magma, size=35)

In [None]:
plot_WT_KO_genes(adata_krt, genes=['Tslp', 'Sprr1a', 'Sprr1b', 'Krt6a', 'Krt16'], 
                 plot_labels_batch=True, plot_KO_vs_WT=False, n_cols=6)
plot_WT_KO_genes(adata_krt, genes=['Krt17', 'Il1f9', 'Krt2', 'Krt15', 'Krt24'],
                 plot_labels_batch=False, plot_KO_vs_WT=True, n_cols=6)

## Proportion of cell types within WT and KO

In [None]:
adata_plot_KOvsWT(adata_krt, list(dict_krts.keys()), col_cell_type='subtype')

## Table with markers and p-values

In [None]:
sc.tl.rank_genes_groups(adata_krt, groupby='subtype')
names = pd.DataFrame(adata_krt.uns['rank_genes_groups']['names'])
pvals = pd.DataFrame(adata_krt.uns['rank_genes_groups']['pvals_adj'])

In [None]:
# Create a DataFrame with 30 most important genes
list_dfs = []
for subtype, genes in dict_krts.items():
    df = pd.DataFrame({f'{subtype} genes': names[subtype], f'p-value {subtype}': pvals[subtype]}, dtype=object)
    df_short = df[df[f'{subtype} genes'].isin(genes)].sort_values(by=f'p-value {subtype}', ascending=True).iloc[:30].reset_index(drop=True)
    df[f'p-value {subtype}'] = df[f'p-value {subtype}'].apply(lambda x: '%.3E' % x).values.tolist()
    list_dfs.append(df_short)

In [None]:
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_4_pops.csv', index=False)
df_all_celltypes

## Dotplot with markers of fibroblast subtypes

In [None]:
sc.pl.dotplot(adata_krt, groupby='subtype', var_names=df_all_celltypes.iloc[:3,::2].values.transpose().ravel(), dendrogram=False, show=False, 
             categories_order=list(dict_krts.keys()))

## UMAP highlighting the expression of specific markers

In [None]:
list_genes = ['Sprr1a', 'Sprr1b', 'Krt6a', 'Krt16', 'Krt8', 'Krt18', 'Il33', 'Il1f9']
sc.pl.umap(adata_krt, color=['batch',  'subtype',], ncols=3, frameon=False, cmap=magma)
sc.pl.umap(adata_krt, color=list_genes, ncols=3, frameon=False, cmap=magma)

In [None]:
sc.pl.umap(adata_krt[adata_krt.obs['batch'].isin(['WT1', 'WT2'])], color=['batch'] + list_genes, ncols=3, frameon=False, cmap=magma)
sc.pl.umap(adata_krt[adata_krt.obs['batch'].isin(['KO1', 'KO2'])], color=['batch'] + list_genes, ncols=3, frameon=False, cmap=magma)

In [None]:
for gene in list_genes: 
    stat_annot_gene(gene, adata_krt, dict_krts, type_plot='violin', add_stats=True)

In [None]:
adata_krt.write_h5ad('adatas/adata_keratinocyte.h5')