### Generates chosen DR scatter plots 
### with expression highlighted for input gene sets
---
#### Combined 10 donors (NO SPL3)

#### ComBat batch corrected values

#### Exploring lineage identification through lineage gene markers
---
##### hpb29

Date: 2021-02-23

In [None]:
%matplotlib widget

import warnings
warnings.filterwarnings('ignore')

import os, sys, json, operator, getpass
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import scanpy as sc

import matplotlib as mpl
import matplotlib.pyplot as plt
from ipywidgets import widgets

from scipy.sparse import csr_matrix

In [None]:
with open('/.singularity.d/labels.json') as fh:
    singularity = json.load(fh)
    
singularity['Version']

In [None]:
sc.settings.verbosity = 3             # show some output
sc.settings.file_format_figs = 'svg'  # set this to 'svg' (notebook) or 'pdf' (files) if you want vector graphics
sc.settings.savefigs = False
#sc.set_figure_params(dpi=150)

In [None]:
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
plt.rc('font', size=14)

In [None]:
home = str(Path.home())
user = getpass.getuser()

basedir = os.path.join(home, 'datafloor/users', user, '2020/SLX19841/')

sc.settings.writedir = os.path.join(basedir, 'analysis/h5ad/')

In [None]:
now = datetime.now()
prefix = now.strftime('%Y%m%d')
print(prefix)

In [None]:
plt.rcParams['image.cmap'] = 'YlOrRd'

In [None]:
def plotarama(plot_type, ann, targets, extra=[], ncols=4, components=['1,2'], raw=False, savename=None):

    # augment target gene plots w/ possible extra observations (e.g. ['leiden'] )
    targets = targets + extra
        
    if plot_type == 'pca':
        axes = sc.pl.pca_scatter(ann, color=targets, ncols=ncols, use_raw=raw, show=False)
    elif plot_type == 'diff':    
        axes = sc.pl.diffmap(ann, color=targets, components=components, ncols=ncols, use_raw=raw, show=False)
    elif plot_type == 'tsne':
        axes = sc.pl.tsne(ann, color=targets, ncols=ncols, use_raw=raw, show=False)
    elif plot_type == 'umap':
        axes = sc.pl.umap(ann, color=targets, ncols=ncols, use_raw=raw, show=False)
    elif plot_type == 'force':
        axes = sc.pl.draw_graph(ann, color=targets, ncols=ncols, use_raw=raw, show=False)
    else:
        print('Invalid plot type: ', plot_type)

    xlabel = axes[0].get_xaxis().get_label().get_text()
    ylabel = axes[0].get_yaxis().get_label().get_text()
   
    # global aesthetics tweaks
    # ------------------------------------------------------------------------
    
    for ax in axes:
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)    
    
    if savename is not None:
        outname = savename+plot_type+'.png'
        plt.savefig(outname, dpi=300)
        
    return xlabel, ylabel

In [None]:
lineages = {

   'hsc_genes'      : ['CD34', 'CD38', 'PTPRC', 'CLEC9A', 'HES1', 'HLF', 'GATA2', 'GFI1', 
                       'HOXB4', 'ID1', 'KMT2A', 'MEIS1', 'FOXO3', 'MAF', 'THY1', 'KIT', 
                       'CD133', 'BMI1', 'RUNX1', 'LMO2', 'TEL', 'TAL1', 'ERG'],
   'hspc_adhesion'  : ['CXCR4', 'CD44', 'ITGA4', 'ITGB1', 'ITGA7', 'ITGA9'], 
   'myeloid'        : ['SPI1', 'CEBPA', 'CEBPD', 'CEBPE', 'GFI1', 'EGR1', 'ELANE', 'AML1'],    
   'erytroid'       : ['HBD', 'KLF1', 'HBB', 'TAL1', 'SHMT2', 'GFI1B', 'NFE2', 'EPOR', 'HBF', 
                      'FOG', 'HBA1', 'TFRC', 'GATA1', 'GLYA', 'MLLT3', 'HBA2'],    
   'Mk'             : ['TAL1', 'NFE2', 'MPL', 'VWF', 'CD42', 'ITGA2B', 'CD9', 'PF4', 'ITGB3', 
                      'FLI1', 'GATA1', 'LOX', 'FCGR3A', 'NFIB', 'TGFB', 'FOG', 'GFI1B'],
   'basophil_mast'  : ['ENPP3', 'CLC', 'CEBPA', 'CMA1', 'SPI1', 'KIT', 'MITF', 'ITGB7', 
                      'MCPT4', 'GATA1', 'CPA3', 'TPSAB1', 'TPSG1'], 
   'B'              : ['CD79A', 'SPI1', 'FOXO1', 'PAX5', 'SOX4', 'FLT3', 'IKZF1', 'BCL11A', 
                      'ID2', 'MS4A1', 'ID3', 'EBF1', 'E2A'],
   'T'              : ['IKZF1', 'GATA3', 'TCF7', 'NOTCH1', 'BCL11B', 'CD8A', 'CD3D'],
   'NK'             : ['GATA3', 'ID2', 'TBX21', 'KLRB1', 'KLRC1', 'EOMES', 'NCAM1', 'NCR1', 
                      'GNLY', 'FCGR3A'],
   'DC'             : ['RELB', 'IRF8', 'SPI1', 'FLT3', 'ID2', 'CLEC9A', 'STAT3', 'CCR7', 'CCL2'],
   'innate_lymphoid': ['RORC', 'TBX21', 'GATA3', 'ID2', 'ITGA4', 'ITGB7', 'KLRB1', 'IL1R1', 
                       'NFIL3', 'IL2RB', 'CCR6', 'INFG', 'IL5', 'IL13', 'IL17', 'TNFSF11'],  
   'T_cell_subsets' : ['TBX21', 'GATA3', 'RORC', 'MAF', 'FOXP3'],
   'neutro_clp'     : ['MPO','MME'],
   'monophil'       : ['IRF8', 'FCGR3B'],    
    
}

In [None]:
%%time
data = sc.read('COMBO10_NO_SPL3_combat_corrected')

# HSPC ADHESION

In [None]:
clean = [x for x in lineages['hspc_adhesion'] if x in data.var.index]
clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], 
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_HSPC_adhesion_')

# HSC

In [None]:
clean = [x for x in lineages['hsc_genes'] if x in data.var.index]
#clean

In [None]:
xlabel, ylabel = plotarama('umap', data, clean, ['cleiden.1.0'], ncols=3, raw=False,
                            savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_HSC_')

# Lineages

---

# Myeloid

In [None]:
clean = [x for x in lineages['myeloid'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_Myeloid_')

# Erytroid

In [None]:
clean = [x for x in lineages['erytroid'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_Erytroid_')

# Mk

In [None]:
clean = [x for x in lineages['Mk'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_Mk_')

# B cells

In [None]:
clean = [x for x in lineages['B'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_B_cells_')

# T cells

In [None]:
clean = [x for x in lineages['T'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, ncols=3, extra=['cleiden.1.0'], raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_T_cells_')

# NK cells

In [None]:
clean = [x for x in lineages['NK'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=4, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_NK_cells_')

# DC cells

In [None]:
clean = [x for x in lineages['DC'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=4, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_DC_cells_')

# Basophil/Mast cells

In [None]:
clean = [x for x in lineages['basophil_mast'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=4, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_Baso_Mast_cells_')

# Innate lymphoid

In [None]:
clean = [x for x in lineages['innate_lymphoid'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=4, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_innate_lymphoid_')

# T cell subsets

In [None]:
clean = [x for x in lineages['T_cell_subsets'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=3, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_subsets_T_cell_')

# Neutro_clp

In [None]:
clean = [x for x in lineages['neutro_clp'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=3, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_Neutro_CLP_')

# Monophil

In [None]:
clean = [x for x in lineages['monophil'] if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=3, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_Monophil_')

# Cell cycle genes

In [None]:
cell_cycle_genes = ['CDK1', 'CCND3', 'CCNB1', 'CCNB2', 'CDKN2D', 'CDKN1A', 'CDKN1B', 
                    'CDKN1C', 'CDK2', 'CDK4', 'CDK6', 'CCND1', 'CCND2', 'CCNE1', 
                    'CCNE2', 'CCNA1', 'CCNA2', 'CCNB3', 'CDKN2A', 'CDKN2C', 'CDKN2B']

In [None]:
clean = [x for x in cell_cycle_genes if x in data.var.index]
#clean

In [None]:
xl, yl = plotarama('umap', data, clean, extra=['cleiden.1.0'], ncols=3, raw=False,
                   savename='figures/'+prefix+'_COMB010_NO_SPL3_ComBat_cell_cycle_genes_')