# Figures for YanLing

In [None]:
from cellassign import assign_cats
import gzip
import itertools as itl
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import seaborn as sns
import triku as tk
import itertools

In [None]:
from datetime import date

today = date.today()
today = today.strftime("%Y_%m_%d")

In [None]:
# Palettes for UMAP gene expression

magma = [plt.get_cmap('magma')(i) for i in np.linspace(0,1, 80)]
magma[0] = (0.88, 0.88, 0.88, 1)
magma = mpl.colors.LinearSegmentedColormap.from_list("", magma[:65])

In [None]:
mpl.rcParams['figure.dpi'] = 200

pd.set_option('display.max_columns', None)

In [None]:
adata_all_harmony = sc.read_h5ad('adatas/adata_all_harmony.h5')
sc.tl.umap(adata_all_harmony, min_dist=0.4)

## Figure of main populations in 4 panels

In [None]:
# Join al subpopulations into one
major_merged_types = ['Fibroblast', 'Keratinocyte', 'Neutrophil', 'Perivascular cell', 'T cell']

In [None]:
merged_cell_types = adata_all_harmony.obs['cell_type_whole'].astype(str)

for mct in major_merged_types:
    mask = [mct in i for i in adata_all_harmony.obs['cell_type_whole']]
    merged_cell_types[mask] = mct

merged_cell_types[[('B cell' in i) or ('T cell' in i) or ('NK cell' in i) or ('Plasma cell' in i) for i in adata_all_harmony.obs['cell_type_whole']]] = 'Lymphoid' 
merged_cell_types[[('Fibroblast' in i) or ('Skeletal' in i) or ('Melanocyte' in i) for i in adata_all_harmony.obs['cell_type_whole']]] = 'Fibroblast' 
merged_cell_types[[('Schwann' in i) or ('Glial' in i) for i in adata_all_harmony.obs['cell_type_whole']]] = 'Schwann cell' 


adata_all_harmony.obs['merged_cell_type'] = merged_cell_types

dict_cell_types_colors = {'Keratinocyte': '#308914',
                          'Mast cell': '#4d0765',
                          
                          'Fibroblast': '#a05a2c',
                          'Lymphoid': '#8e0c6f',
                          
                          'Endothelial': '#d40055',
                          'Neutrophil': '#ae54b8',
                          
                          'Lymphatic': '#ff5599',
                          'Monocyte': '#77dcf9',
                          
                          'Perivascular cell': '#ff9955',
                          'Macrophage': '#47bcd1',
                          
                          'Schwann cell': '#d3bc5f',
                          'Langerhans cell': '#002d72',
                    }

list_cell_types = list(dict_cell_types_colors.keys())[::2] + list(dict_cell_types_colors.keys())[1::2]

adata_all_harmony.obs['merged_cell_type'] = adata_all_harmony.obs['merged_cell_type'].astype('category')
adata_all_harmony.uns['merged_cell_type_colors'] = [dict_cell_types_colors[i] for i in adata_all_harmony.obs['merged_cell_type'].cat.categories]

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
s = 10
plt.tight_layout()
sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['batch'] == 'KOD11'], color='merged_cell_type', s=s, ax = axs[0, 0], legend_loc=False, title='KO11', frameon=False, show=False)
sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['batch'] == 'KOD12'], color='merged_cell_type', s=s, ax = axs[0, 1], legend_loc=False, title='KO12', frameon=False, show=False)

sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['batch'] == 'WT1'], color='merged_cell_type', s=s, ax = axs[1, 0], legend_loc=False, title='WT1', frameon=False, show=False)
sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['batch'] == 'WT2'], color='merged_cell_type', s=s, ax = axs[1, 1], legend_loc=False, title='WT2', frameon=False,  show=False)

plt.subplots_adjust(wspace=0.025, hspace=0.1)
plt.savefig(f'results/figures/makers_4_panels_{today}.png', dpi=300)


fig2, ax = plt.subplots(1, 1, figsize=(25, 3))
plt.axis('off')
legend_elements = [mpl.lines.Line2D([0], [0], marker='o', color=(1, 1, 1, 1), label=name_i, markerfacecolor=color_i, markersize=25) for name_i, color_i in dict_cell_types_colors.items()]
ax.legend(handles=legend_elements, loc='upper left', ncol=6, frameon=False, fontsize=25)
plt.tight_layout()
plt.savefig(f'results/figures/makers_4_panels_legend_{today}.png', dpi=300)
plt.show()

### Main population markers

In [None]:
cell_markers = {'Keratinocyte': ['Lgals7', 'S100a14', 'Perp', 'Sfn', 'Dmkn', 'Dsp', 'Ly6d', 'Krt14', 'Dsc3', 'Krt5', 'Pkp3', 'Pkp1', 'Rab25', 'Apoc1', 'Aqp3', 
                                 'Tacstd2', 'Lypd3', 'Krtdap', 'Calm4', 'Krt15', 'Col17a1', 'Serpinb5', 'Fam25c', 'Dsg1a', 'Ckmt1', 'Urah', 'Esrp1', 'Capns2', 
                                 'Trp63', 'Sbsn', 'Dapl1', 'Fgfbp1', 'Krt80', 'Stard10', 'Tfap2a', 'Cldn4', 'Sytl1', 'Celsr2',], 
                'Fibroblast': ['Bgn', 'Sparc', 'Col6a1', 'Dcn', 'Col1a1', 'Col1a2', 'Mmp2', 'Serpinf1', 'Lum', 'Pcolce', 'Ccdc80', 'Mfap4', 'Mfap2', 'Plac9b', 
                               'Comp', 'Gpc3', 'Ikbip', 'Clec3b', 'Ecgr4', 'Thbs2', 'Igf2', 'Lox', 'Prrx2', 'Ntn1', 'Fkbp7', 'Cyp26b1', 'Apod', 'Lhfp', 'Srpx', 
                               'Cpq', 'Creb3l1', 'Cthrc1', 'P3h3', 'Crtap', 'Gpx7'], 
                'Endothelial': ['Pecam1', 'Cdh5', 'Arhgap29', 'Adgrf5', 'Emcn', 'Egfl7', 'Rasip1', 'Plvap', 'Eng', 'Cd93', 'Gimap6', 'Fabp4', 'Mast4', 'Gng11', 
                                'Pdlim1', 'Ecscr', 'Ptprb', 'Cldn5', 'Tie1', 'Kitl', 'Depp1', 'Serpine1', 'S1pr1', 'Flt1', 'Neurl3', 'Adgrl4', 'Sox7', 'Aqp1', 
                                'Lrrc32', 'Upp1', 'Pdgfb', 'Robo4', 'Slfn5', 'Icam2', 'Parvb', 'Kdr', 'Podxl', 'Sox17', 'Erg', 'Mmrn2', 'Dll4', ], 
                'Lymphatic': ['Fgl2', 'Olfm1', 'Gdf15', 'Mmrn1', 'Prox1', 'Pard6g', 'Ccl21a', 'Lyve1', 'Reln', 'Dtx1', 'Tbx1', 'Sh3gl3', 'Flt4', 'Slc45a3', 
                              'Nts', 'Slc38a4', 'Klhl4', 'Pgm5', 'Flt4', 'Klhl4', 'Slc5a3', 'Pgm5', 'Sgk3', 'Fam189a2', 'Mical2', 'Tbc1d4', 'Clca3a1', 'Scn3a', 'Prkcz',
                              'Cox19', 'Eif2b2', 'Stab1', 'Stmn2', 'Rnf141', 'Tec'], 
                'Perivascular cell': ['Igfbp7', 'Rgs5', 'Procr', 'Acta2', 'Ndufa4l2', 'Ptp4a3', 'Tagln', 'Aoc3', 'Myh11', 'Cox4l2', 
                                      'Mustn1', 'Apold1', 'Epas1', 'Esam', 'Pdgfrb', 'Myl9', 'Mcam', 'Rgs4', 'Ngf', 'Tinagl1', 
                                      'Ppp1r14a', 'Kcne4', 'Gja4', 'Ednrb', 'Gucy1b1', 'Carmn', 'Gucy1a1', 'Synpo2', 'Abcc9', 'Itga7', 
                                      'Notch3', 'Mylk', 'Actg2', 'Higd1b'],
                'Schwann cell': ['Plp1', 'Egfl8', 'Kcna1', 'Mpz', 'Pmp22', 'Cadm4', 'Foxd3', 'Sema3b', 'Mal', 'Gatm', 'Mbp', 'Cnp', 'Limch1', 
                                 'Plekha4', 'Art3', 'Aatk', 'Gas2l3', 'Col28a1', 'Prx', 'Dmd', 'Plekhb1', 'Sox10', 'Utrn', 'Cldn19', 'Ncmap', 
                                 'Mag', 'Zfp536', 'Hspa12a', 'Mlip', 'Itgb8', 'Bcas1', 'Cd59a', 'Cmtm5', 'Pea15a', 'Snca', 'Gjc3', 'Fam178b', 
                                 'Ugt8a', 'Adam23', 'Drp2', 'Kcna2', 'Cadm1'], 
                'Lymphoid': ['Ctla2a', 'Rgs1', 'Cd3d', 'Cd3g', 'Trbc2', 'Ptprcap', 'Neurl3', 'Tnfrsf9', 'Trdv4', 'Trbc1', 'Klrk1', 'Cd3e', 'Cd2', 'Nkg7', 'Il2rg', 
                             'Cd7', 'Tcrg-C1', 'Itk', 'Ctsw', 'Il2rb', 'Ltb', 'Ptpn22', 'Satb1', 'Klrb1b', 'Klrd1', 'Sytl3', 'Lck', 'Sipa1l1', 'Grap2', 'Cd69', 'Camk4'],
                'Neutrophil': ['Cstdc4', 'S100a8', 'Csta2', 'S100a9', 'Cstdc5', 'Stfa2l1', 'Wfdc21', 'G0s2', 'Hcar2', 'Csta3', 'Stfa3', 'Lyz2', 'Itgam', 
                               'Retnlg', 'Ngp', 'Pglyrp1', 'Clec4e', 'Camp', 'Hdc', 'C3', 'Cd300lf', 'Hp', 'F10', 'Trem3', 'Mcemp1', 'Chil1', 'Ltf', 'Slc2a3', 
                               'Asprv1', 'Sp140', 'Chil3', 'Msra', 'Cstdc6',],
                'Monocyte': ['Clec4d', 'Cd14', 'Ctss', 'Pla2g7', 'Clec4e', 'Thbs1', 'Lgmn', 'Csf1r', 'Cybb', 'Wfdc17', 'Fcgr2b', 'F10', 'Syk', 'Pid1', 
                             'Mpeg1', 'Slc11a1', 'Ccrl2', 'Lilr4b', 'Ctsb', 'Sirpa', 'F13a1', 'Hcls1', 'Pf4', 'Clec4n', 'Csf3', 'Cd36', 'Saa3', 'Slc7a8', 'Ifi204', 'Msr1'],
                'Macrophage': ['Lyz2', 'C1qa', 'C1qb', 'Pf4', 'C1qc', 'Trem2', 'Cd36', 'Ms4a7', 'Stab1', 'Fxyd2', 'Pld4', 'Adgre1', 'Folr2', 'Cbr2', 'Mrc1', 'Ly86', 'Aif1', 
                               'Clec4a3', 'Gusb', 'Selenop', 'Cfh', 'Rab3il1', 'Ms4a6b', 'Alox5', 'Dok2', 'Tmem37', 'Fcrls', 'Mgl2', 'Tbxas1', 'Slamf9', 'P2ry12', 'Ccl24'],
                'Langerhans cell': ['Cd74', 'H2-Aa', 'H2-Eb1', 'H2-Ab1', 'Ctss', 'Tbc1d4', 'Csf2rb2', 'Bcl2a1d', 'Gngt2', 'Pkib', 'Myo1g', 'Nr4a3', 'Zmynd15', 'Ccr7', 
                                    'Cd86', 'Cd207', 'Ccl22', 'Ltc4s', 'H2-DMa', 'Il1r2', 'Il4i1', 'Dnah2', 'Grk3', 'Ncoa7', 'Gpd2', 'Osgin2', 'St14', 'Cacnb3', 'Ly75', 
                                    'Irf5', 'Rhof', 'Timd4', 'Rogdi', 'Mreg', 'Map3k14', 'Cd40']}

In [None]:
sc.tl.rank_genes_groups(adata_all_harmony, groupby='merged_cell_type')

In [None]:
for cell_type in list_cell_types:
    print(cell_type)
    sc.pl.umap(adata_all_harmony, color=adata_all_harmony.uns['rank_genes_groups']['names'][cell_type][120:250], alpha=0.5, ncols=3, cmap=magma)

In [None]:
names = pd.DataFrame(adata_all_harmony.uns['rank_genes_groups']['names'])
pvals = pd.DataFrame(adata_all_harmony.uns['rank_genes_groups']['pvals_adj'])

In [None]:
# Create a DataFrame with 30 most important genes
list_dfs = []
for cell_type, genes in cell_markers.items():
    df = pd.DataFrame({f'{cell_type} genes': names[cell_type], f'p-value {cell_type}': pvals[cell_type]}, dtype=object)
    df_short = df[df[f'{cell_type} genes'].isin(genes)].sort_values(by=f'p-value {cell_type}', ascending=True).iloc[:30].reset_index(drop=True)
    df[f'p-value {cell_type}'] = df[f'p-value {cell_type}'].apply(lambda x: '%.3E' % x).values.tolist()
    list_dfs.append(df_short)

In [None]:
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_4_pops.csv', index=False)
df_all_celltypes

In [None]:
sc.pl.tracksplot(adata_all_harmony, groupby='merged_cell_type', var_names=df_all_celltypes.iloc[:7,::2].values.transpose().ravel(), 
                 dendrogram=True, xticklabels=False)

In [None]:
sc.pl.dotplot(adata_all_harmony, groupby='merged_cell_type', var_names=df_all_celltypes.iloc[:9,::2].values.transpose().ravel(), dendrogram=True, show=False)

## Figure of main populations in 2 panels (WT / KO)

In [None]:
adata_all_harmony.obs['conditon-cell_type'] = (adata_all_harmony.obs['merged_cell_type'].astype(str) + '-' + adata_all_harmony.obs['condition'].astype(str)).astype('category')
adata_all_harmony.uns['conditon-cell_type_colors'] = list(itertools.chain(*list(zip(adata_all_harmony.uns['merged_cell_type_colors'], adata_all_harmony.uns['merged_cell_type_colors']))))

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4))
s = 10
plt.tight_layout()
sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['condition'] == 'KO'], color='merged_cell_type', s=s, ax = axs[0], title='KO', legend_loc=False, frameon=False, show=False)
sc.pl.umap(adata_all_harmony[adata_all_harmony.obs['condition'] == 'WT'], color='merged_cell_type', s=s, ax = axs[1], title='WT', legend_loc=False, frameon=False, show=False)

plt.subplots_adjust(wspace=0.025, hspace=0.1)
plt.savefig(f'results/figures/makers_2_panels_{today}.png', dpi=300)

### Main population markers (whole KO VS whole WT)

In [None]:
sc.tl.rank_genes_groups(adata_all_harmony, groupby='condition')

In [None]:
for condition in ['KO', 'WT']:
    print(condition)
    N = 30
    fig, axs =  plt.subplots(N, 2, figsize=(5 * 2, 4 * N))
    
    for i in range(2 * N):
        sc.pl.violin(adata_all_harmony, adata_all_harmony.uns['rank_genes_groups']['names'][condition][i], groupby='conditon-cell_type', rotation=90, 
                     ax=axs.ravel()[i], show=False)
        plt.tight_layout()
    
    plt.savefig(f'results/figures/{today}-violinplots_high_{condition}.png')

In [None]:
names = pd.DataFrame(adata_all_harmony.uns['rank_genes_groups']['names'])
pvals = pd.DataFrame(adata_all_harmony.uns['rank_genes_groups']['pvals_adj'])
logfold = pd.DataFrame(adata_all_harmony.uns['rank_genes_groups']['logfoldchanges'])

In [None]:
names

In [None]:
# Create a DataFrame with 30 most important genes
list_dfs = []
for condition in ['KO', 'WT']:
    df = pd.DataFrame({f'{condition} genes': names[condition], f'p-value {condition}': pvals[condition], f'logfold {condition}': logfold[condition]}, dtype=object)
    df_short = df.iloc[:250].reset_index(drop=True)
    df[f'p-value {condition}'] = df[f'p-value {condition}'].apply(lambda x: '%.3E' % x).values.tolist()
    list_dfs.append(df_short)

In [None]:
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_KO_VS_WT.csv', index=False)
df_all_celltypes

### Main population markers (individual cell type KO VS WT)

In [None]:
list_dfs = []

for cell_type in list_cell_types:
    sc.tl.rank_genes_groups(adata_all_harmony, groupby='conditon-cell_type', groups=[f'{cell_type}-KO'], reference=f'{cell_type}-WT')

    df = pd.DataFrame({f'genes {cell_type}': adata_all_harmony.uns['rank_genes_groups']['names'][f'{cell_type}-KO'], 
                       f'p-value {cell_type}': adata_all_harmony.uns['rank_genes_groups']['pvals_adj'][f'{cell_type}-KO'], 
                       f'logfold {cell_type}': adata_all_harmony.uns['rank_genes_groups']['logfoldchanges'][f'{cell_type}-KO']}, dtype=object)
    df_short = df.sort_values(by=f'p-value {cell_type}', ascending=True).iloc[:250].reset_index(drop=True)
    list_dfs.append(df_short)
    
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_KO(hi)_VS_WT(lo)_per_cell_type.csv', index=False)
df_all_celltypes

In [None]:
list_dfs = []

for cell_type in list_cell_types:
    sc.tl.rank_genes_groups(adata_all_harmony, groupby='conditon-cell_type', groups=[f'{cell_type}-WT'], reference=f'{cell_type}-KO')

    df = pd.DataFrame({f'genes {cell_type}': adata_all_harmony.uns['rank_genes_groups']['names'][f'{cell_type}-WT'], 
                       f'p-value {cell_type}': adata_all_harmony.uns['rank_genes_groups']['pvals_adj'][f'{cell_type}-WT'], 
                       f'logfold {cell_type}': adata_all_harmony.uns['rank_genes_groups']['logfoldchanges'][f'{cell_type}-WT']}, dtype=object)
    df_short = df.sort_values(by=f'p-value {cell_type}', ascending=True).iloc[:250].reset_index(drop=True)
    list_dfs.append(df_short)
    
df_all_celltypes = pd.concat(list_dfs, axis=1)
df_all_celltypes.to_csv(f'results/{today}-markers_WT(hi)_VS_KO(lo)_per_cell_type.csv', index=False)
df_all_celltypes