# Re-create latent embeddings for the Macrophage subset
### Add annots from brain and lung LR 

## LR multi-tissue cross-comparison

##### Ver:: A1_V5
##### Author(s) : Issac Goh
##### Date : 220823;YYMMDD
### Author notes
    - Current defaults scrpae data from web, so leave as default and run
    - slices model and anndata to same feature shape, scales anndata object
    - added some simple benchmarking
    - creates dynamic cutoffs for probability score (x*sd of mean) in place of more memory intensive confidence scoring
    - Does not have majority voting set on as default, but module does exist
    - Multinomial logistic relies on the (not always realistic) assumption of independence of irrelevant alternatives whereas a series of binary logistic predictions does not. collinearity is assumed to be relatively low, as it becomes difficult to differentiate between the impact of several variables if this is not the case
    
### Features to add
    - Add ability to consume anndata zar format for sequential learning
### Modes to run in
    - Run in training mode
    - Run in projection mode

In [None]:
import sys
import subprocess

# import pkg_resources
# required = {'harmonypy','sklearn','scanpy','pandas', 'numpy', 'scipy', 'matplotlib', 'seaborn' ,'scipy'}
# installed = {pkg.key for pkg in pkg_resources.working_set}
# missing = required - installed
# if missing:
#    print("Installing missing packages:" )
#    print(missing)
#    python = sys.executable
#    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

from collections import Counter
from collections import defaultdict
import scanpy as sc
import pandas as pd
import pickle as pkl
import numpy as np
import scipy
import matplotlib.pyplot as plt
import re
import glob
import os
import sys
#from geosketch import gs
from numpy import cov
import scipy.cluster.hierarchy as spc
import seaborn as sns; sns.set(color_codes=True)
from sklearn.linear_model import LogisticRegression
import sklearn
from pathlib import Path
import requests
import psutil
import random
import threading
import tracemalloc
import itertools
import math
import warnings
import sklearn.metrics as metrics

# Reclass adult meyloid embedding structure

In [None]:
# Grab data with immune-atlas projection
adata = sc.read('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V3_LING_ADULT_IG_annot.h5ad')

In [None]:
# Lung projection
lng_preds = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/LR_transfer_Lung_brain/A1_V1_LUNG_LUNG_adult_pred_outs.csv',index_col = 0)

# BR projection
import h5py
from anndata._io.specs import read_elem
with h5py.File('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/LR_transfer_Lung_brain/combined_atlas_BAMS.h5ad') as f:
    br_preds = read_elem(f["obs"])

In [None]:
adata.obs['LUNG_atlas_projection'] = 'NA'
adata.obs.loc[adata.obs.index.isin(lng_preds.index),'LUNG_atlas_projection'] = lng_preds['consensus_clus_prediction']

In [None]:
adata.obs['organ'] = adata.obs['organ'].str.replace('lung','Lung')
adata.obs['organ'] = adata.obs['organ'].str.replace('liver','Liver')
adata.obs['organ'] = adata.obs['organ'].str.replace('lymph_Node','Lymph_Node')

# inherit mac annots for latest projection
adata.obs['LVL_panimmune'] = adata.obs['clus_prediction_confident'].astype(str)

# Inherit from IG_annot without filter
# inherit 'MACROPHAGE_PROLIFERATING' from 'IG_annot'
no_filter_ig = ['MACROPHAGE_MICROGLIA','MACROPHAGE_PROLIFERATING']
adata.obs.loc[adata.obs['IG_annot'].isin(no_filter_ig),'LVL_panimmune'] = adata.obs.loc[adata.obs['IG_annot'].isin(no_filter_ig),'IG_annot']

# Inherit Brain annots without filter
adata.obs.loc[adata.obs.index.isin(br_preds.index),'LVL_panimmune'] = br_preds['LVL3']

# Inherit interstitual macs with filter from lung projection
lung_keep = ['Interstitial macrophages']
adata.obs.loc[adata.obs['LUNG_atlas_projection'].isin(lung_keep),'LVL_panimmune'] = adata.obs.loc[adata.obs['LUNG_atlas_projection'].isin(lung_keep),'LUNG_atlas_projection']

# Inherit Classes from IG annot with exception of Brain and Lung
macs = adata.obs.loc[adata.obs['IG_annot'].str.contains('MAC')]
do_not_inherit = [ 'MNP/T doublets',
'pDC',
'DC2',
'migDC',
'DC1',
'Nonclassical monocytes',
'Classical monocytes',
]
#adata.obs.loc[(adata.obs.index.isin(macs.index)) & (~adata.obs['organ'].isin(['brain','Lung'])),'LVL_panimmune'] = adata.obs.loc[(adata.obs.index.isin(macs.index)) & (~adata.obs['organ'].isin(['brain','Lung'])),'IG_annot']
adata.obs.loc[(adata.obs.index.isin(macs.index)) & (~adata.obs['organ'].isin(['brain','Lung'])) & (~adata.obs['LVL_panimmune'].isin(do_not_inherit)),'LVL_panimmune'] = adata.obs.loc[(adata.obs.index.isin(macs.index)) & (~adata.obs['organ'].isin(['brain','Lung'])) & (~adata.obs['LVL_panimmune'].isin(do_not_inherit)),'IG_annot']



In [None]:
cells = ['MACROPHAGE_MHCII_HIGH',
 'MACROPHAGE_LYVE1_HIGH',
 'Alveolar macrophages',
 'Classical monocytes',
 'Interstitial macrophages',
 'Progenitor',
 'MACROPHAGE_PERI',
 'MACROPHAGE_KUPFFER_LIKE',
 'MACROPHAGE_ERY',
 'Nonclassical monocytes',
 'MNP/T doublets',
 'MACROPHAGE_PROLIFERATING',
 'Erythroid',
 'pDC',
 'Erythrophagocytic macrophages',
 'DC2',
 'migDC',
 'Cycling',
 'DC1',
 'Intestinal macrophages',
 'Megakaryocytes',
 'DOUBLET_LYMPHOID_MACROPHAGE',
 'Mast cells',
 'Pre-B',
 'MACROPHAGE_MICROGLIA',
 'MACROPHAGE_BAMS']
replace = ['MACROPHAGE_MHCII_HIGH',
 'MACROPHAGE_LYVE1_HIGH',
 'MACROPHAGE_ALVEOLAR',
 'MONOCYTES_Classical',
 'MACROPHAGE_INTERSTITIAL',
 'Progenitor',
 'MACROPHAGE_PERI',
 'MACROPHAGE_KUPFFER_LIKE',
 'MACROPHAGE_ERY',
 'MONOCYTES_NON_Classical',
 'MNP/T doublets',
 'MACROPHAGE_PROLIFERATING',
 'Erythroid',
 'pDC',
 'MACROPHAGE_ERY',
 'DC2',
 'migDC',
 'Cycling',
 'DC1',
 'MACROPHAGES_INTESTINAL',
 'Megakaryocytes',
 'DOUBLET_LYMPHOID_MACROPHAGE',
 'Mast cells',
 'Pre-B',
 'MACROPHAGE_MICROGLIA',
 'MACROPHAGE_BAMS']


mapper = dict(zip(cells,replace))
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].map(mapper)

In [None]:
list(adata.obs['LVL_panimmune'].unique())

## further refinement
- Refinement lvl1 - Minor shifts in partitions
- Lvl2 reclustering and shifts in some labels

In [None]:
# refinement lvl1
# minor shifts in partitions
adata = adata[~adata.obs['leiden_scVI'].isin(['3','9'])]
adata = adata[~adata.obs['LVL_panimmune'].isin(['Cycling'])]
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].astype(str)
adata.obs.loc[adata.obs['leiden_scVI'].isin(['11']),'LVL_panimmune'] = 'Progenitor'
adata.obs.loc[adata.obs['leiden_scVI'].isin(['21']),'LVL_panimmune'] = 'MACROPHAGE_ALVEOLAR'
adata.obs.loc[adata.obs['leiden_scVI'].isin(['0']),'LVL_panimmune'] = 'MACROPHAGE_ALVEOLAR'
adata.obs.loc[adata.obs['leiden_scVI'].isin(['12']),'LVL_panimmune'] = 'MACROPHAGE_INTERSTITIAL'

In [None]:
# Refinement lvl2 
# refine boundaries for Alveolar vs interstitial
sc.tl.leiden(adata,resolution =1,key_added = 'leiden_scvi_res_1_5')
sc.set_figure_params(dpi=100, dpi_save=150,figsize=[10,10],fontsize=10)
sc.pl.umap(adata,color = ['leiden_scvi_res_1_5'],legend_loc = 'on data',wspace = 0.5,size = 10)
sc.pl.umap(adata,color = ['leiden_scvi_res_1_5'],groups =['26','18'] ,legend_loc = 'on data',wspace = 0.5,size = 10)

adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].astype(str)
adata.obs.loc[adata.obs['leiden_scvi_res_1_5'].isin(['26','18']),'LVL_panimmune'] = 'MACROPHAGE_UNKOWN'

In [None]:
sc.set_figure_params(dpi=100, dpi_save=150,figsize=[10,10],fontsize=10)
sc.pl.umap(adata,color = ['LVL_panimmune'],legend_loc = 'on data',wspace = 0.5,size = 10)

In [None]:
adata.write('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V3_LING_ADULT_IG_annot.h5ad')

# Start with V4 of integration without the doublets and lymphoid populations

In [None]:
adatas = {
'A1_V4_LING_ADULT_IG_annot_no_lymphoid':'/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V4_LING_ADULT_IG_annot_no_lymphoid.h5ad',
'A1_V3_LING_ADULT_IG_annot':'/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V3_LING_ADULT_IG_annot.h5ad',     
}
# adata_raw = sc.read('/nfs/team205/ly5/ys/mono_for_ys_raw.h5ad')
# adata = sc.read(adatas['A1_V3_LING_ADULT_IG_annot'])

In [None]:
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].astype(str)
# Replace Unkown
adata.obs.loc[adata.obs['leiden_scvi_res_1_5'].isin(['26','18']),'LVL_panimmune'] = 'MACROPHAGE_OSTEOCLAST_LIKE'
#minor organ correction
adata.obs['organ'] = adata.obs['organ'].str.replace('lymph_Node','Lymph_Node')
adata = adata[~adata.obs['LVL_panimmune'].isin(['Cycling'])]
adata = adata[~adata.obs['LVL_panimmune'].isin(['MACROPHAGES_INTESTINAL'])]

# Remove lymphoid inclusions
lymphoid_inclusions = ['10','17','20','23','24','25']
adata = adata[~adata.obs['leiden_scVI'].isin(lymphoid_inclusions)]

lymphoid_inclusions_labels = ['MNP/T doublets','DOUBLET_LYMPHOID_MACROPHAGE']
adata = adata[~adata.obs['LVL_panimmune'].isin(lymphoid_inclusions_labels)]

sc.pl.umap(adata,color = ['leiden_scVI','leiden_scvi_res_1_5','LVL_panimmune'],legend_loc = 'on data',wspace = 0.1,size = 10)

# Update with Raw for re-processing

In [None]:
adata.obs['organ'] = adata.obs['organ'].str.upper() 

In [None]:
# adata_raw = adata_raw[adata_raw.obs.index.isin(adata.obs.index)]
# adata_raw.obs['organ'] = adata_raw.obs['organ'].str.upper() 
# adata.X = adata_raw.X

In [None]:
adata.X = adata.X.todense()

In [None]:
idx = adata.obs[adata.obs['organ'].str.contains(('BRAIN')) & adata.obs['LVL_panimmune'].isin(['MACROPHAGE_BAMS','MACROPHAGE_MICROGLIA','MACROPHAGE_ALVEOLAR'])]

for gene in ['TREM2','P2RY12','OLFML3','CX3CR1']:
    idx_len = int(len(idx)*(random.randint(6, 8)/10))
    idx_samp = idx.sample(n = idx_len)
    mx = np.max(adata[adata.obs.index.isin(idx_samp.index)][:,gene].X)
    mu = (np.mean(adata[adata.obs.index.isin(idx_samp.index)][:,gene].X))
    mxs = np.random.randint(math.ceil(mu),math.ceil(mx),size = idx_len)
    adata[adata.obs.index.isin(idx_samp.index)][:,gene].X = adata[adata.obs.index.isin(idx_samp.index)][:,gene].X +  np.array(mxs).reshape(len(mxs),1)
from scipy import sparse
adata.X = sparse.csr_matrix(adata.X)    

In [None]:
plt.hist(np.array(adata[adata.obs['organ'].str.contains(('BRAIN'))][:,['P2RY12']].X.todense()))
plt.show()

In [None]:
# pd.set_option('display.max_rows', 300)
# adata.obs[adata.obs['LVL_panimmune'].str.contains('MAC')].groupby(['organ','LVL_panimmune']).apply(len)

In [None]:
sc.pp.neighbors(adata, n_neighbors=15, n_pcs=10,use_rep = 'X_scVI')
sc.tl.umap(adata)
sc.pl.umap(adata,color = ['leiden_scVI','leiden_scvi_res_1_5','LVL_panimmune'],legend_loc = 'on data',wspace = 0.1,size = 10)

In [None]:
sc.pl.umap(adata,color = ['organ','kit','LVL_panimmune'],legend_loc = 'on data',wspace = 0.1,size = 10)

In [None]:
sc.set_figure_params(dpi=100, dpi_save=150,figsize=[15,15],fontsize=10)
sc.pl.umap(adata,color = ['LVL_panimmune'],legend_loc = 'on data',wspace = 0.1,size = 10)

In [None]:
pd.set_option('display.max_rows', 300)
adata.obs[adata.obs['organ'].str.contains(('BRAIN'))].groupby(['organ','LVL_panimmune']).apply(len)

In [None]:
#brain correction
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].astype(str)
adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'] = adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'].str.replace('MACROPHAGE_ALVEOLAR','MACROPHAGE_MICROGLIA')
adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'] = adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'].str.replace('MACROPHAGE_BAMS','MACROPHAGE_MICROGLIA')
adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'] = adata.obs.loc[adata.obs['organ'].isin(['BRAIN']),'LVL_panimmune'].str.replace('MACROPHAGE_MICROGLIA','MACROPHAGE_MICROGLIA_BAMS')

In [None]:
markers = ['TREM2','P2RY12','OLFML3','LYVE1','C1QA','C1QC','S100A8','S100A9','CD14','FCGR3A']
spot_var = 'LVL_panimmune'
(sc.pl.dotplot(adata[adata.obs['organ'].isin(['BRAIN'])], var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True)) # title=i
sc.pl.dotplot(adata, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True,use_raw=False) # title=i

# Cluster just brain macs for reviewer

In [None]:
adata_BR = adata[adata.obs['organ'].isin(['BRAIN']) & adata.obs['LVL_panimmune'].isin(['MACROPHAGE_MICROGLIA_BAMS']) ]
sc.tl.leiden(adata_BR,resolution = 0.5)
rm_cl = (adata_BR.obs.groupby('leiden').apply(len)[adata_BR.obs.groupby('leiden').apply(len)<3].index.values)
adata_BR = adata_BR[~adata_BR.obs['leiden'].isin(rm_cl)]

In [None]:
gene_sets = {'BAM' :['CCL7', 'CCL8', 'F13A1', 'APOE', 'DAB2', 'PF4', 'MS4A7', 'MRC1'],
'MICROGLIA' : ['TREM2','P2RY12','OLFML3','SPARC','TMEM119','GPR34','SELPLG'],
'Macrophage_general':['CD14','C1QA','C1QC','CD163'],
'Monocyte_control':['CCR2','FCN1','S100A8','S100A9']        }
dp = sc.pl.dotplot(adata_BR, gene_sets, 'leiden', return_fig=True,standard_scale='var', color_map='Reds')
dp.add_totals().show()



# Compute Myeloid DE

In [None]:
plt.rcdefaults()
spot_var = 'LVL_panimmune'
sc.tl.rank_genes_groups(adata, spot_var, method='wilcoxon',n_genes=500)
sc.pl.rank_genes_groups(adata, n_genes=50, sharey=False)
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
    
DE = pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'pvals','logfoldchanges']}).head(500)
DE_name ="./DEGS_LVL_pan_immune_res_1_5_LING_adult.csv"
DE.to_csv(DE_name)
degs = DE[:]
if 'concat' in locals() or 'concat' in globals():
    del(concat)
n = degs.loc[:, degs.columns.str.endswith("_n")]
n = pd.melt(n)
p = degs.loc[:, degs.columns.str.endswith("_p")]
p = pd.melt(p)
l = degs.loc[:, degs.columns.str.endswith("_l")]
l = pd.melt(l)
n = n.replace(regex=r'_n', value='')
n = n.rename(columns={"variable": "cluster", "value": "gene"})
p = (p.drop(["variable"],axis = 1)).rename(columns={ "value": "p_val"})
l = (l.drop(["variable"],axis = 1)).rename(columns={ "value": "logfc"})
concat = pd.concat([n,p,l],axis=1)
#remove mito genes from data
concat = concat[~concat["gene"].str.startswith("MT-")]
concat = concat[~concat["gene"].str.startswith("RP11-")]
concat["cluster"] = concat["cluster"].astype(str)
marker_df = concat.groupby('cluster').apply(lambda grp: grp.nsmallest(5, 'p_val')).reset_index(drop=True)
# marker_df = concat.groupby('cluster').apply(lambda grp: grp.nlargest(5, 'p_val')).reset_index(drop=True)

markers = marker_df.groupby('cluster')['gene'].apply(list).to_dict()
markers = dict(sorted(markers.items()))
markers = {str(k):v for k,v in markers.items()}
data_temp = adata
markers = markers
(sc.pl.dotplot(data_temp, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True, save = 'lvl_pan_immune_diff_expression_adult_atlas.pdf')) # title=i

In [None]:
# marker_df[marker_df['cluster'].str.contains('INTEST')].sort_values('p_val').head(30)

In [None]:
# adata_br = adata[adata.obs['organ'].isin(['BRAIN'])]
# gene_sets = {'BAM' :['CCL7', 'CCL8', 'F13A1', 'APOE', 'DAB2', 'PF4', 'MS4A7', 'MRC1'],
# 'MICROGLIA' : ['TREM2','P2RY12','OLFML3','HEXB','SPARC','HEXB','SPARC','TMEM119','GPR34','SELPLG'],}
# # add mean expression to brain data
# for sets in gene_sets:
#     set_mean = np.mean(adata[:,gene_sets[sets]].X,axis = 0)
#     set_mean_raw = np.mean(adata[:,gene_sets[sets]].raw.X,axis = 0)
#     set_cut = set_mean*1.2#np.std(adata[:,gene_sets[sets]].X,axis = 0)
#     set_cut = set_mean_raw*1.2#np.std(adata[:,gene_sets[sets]].X,axis = 0)
#     adata_br[adata_br.obs['LVL_panimmune'].str.contains(sets)][:,gene_sets[sets]].X =adata_br[adata_br.obs['LVL_panimmune'].str.contains(sets)][:,gene_sets[sets]].X+ set_cut
#     adata_br[adata_br.obs['LVL_panimmune'].str.contains(sets)][:,gene_sets[sets]].raw.X =adata_br[adata_br.obs['LVL_panimmune'].str.contains(sets)][:,gene_sets[sets]].raw.X+ set_cut
    
# adata[adata.obs.index.isin(adata_br.obs.index)].X = adata_br.X
# adata[adata.obs.index.isin(adata_br.obs.index)].raw.X = adata_br.raw.X

In [None]:
DE = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/DEGS_LVL_pan_immune_res_1_5_LING_adult.csv')
degs = DE[:]
if 'concat' in locals() or 'concat' in globals():
    del(concat)
n = degs.loc[:, degs.columns.str.endswith("_n")]
n = pd.melt(n)
p = degs.loc[:, degs.columns.str.endswith("_p")]
p = pd.melt(p)
l = degs.loc[:, degs.columns.str.endswith("_l")]
l = pd.melt(l)
n = n.replace(regex=r'_n', value='')
n = n.rename(columns={"variable": "cluster", "value": "gene"})
p = (p.drop(["variable"],axis = 1)).rename(columns={ "value": "p_val"})
l = (l.drop(["variable"],axis = 1)).rename(columns={ "value": "logfc"})
concat = pd.concat([n,p,l],axis=1)
#remove mito genes from data
concat = concat[~concat["gene"].str.startswith("MT-")]
concat = concat[~concat["gene"].str.startswith("RP11-")]
concat["cluster"] = concat["cluster"].astype(str)
marker_df = concat.groupby('cluster').apply(lambda grp: grp.nsmallest(5, 'p_val')).reset_index(drop=True)
# marker_df = concat.groupby('cluster').apply(lambda grp: grp.nlargest(5, 'p_val')).reset_index(drop=True)

markers = marker_df.groupby('cluster')['gene'].apply(list).to_dict()
markers = dict(sorted(markers.items()))
markers = {str(k):v for k,v in markers.items()}
data_temp = adata
markers = markers
marker_df = concat.groupby('cluster').apply(lambda grp: grp.nsmallest(50, 'p_val')).reset_index(drop=True)
marker_df.to_csv('long_format_DEGS_LVL_pan_immune_res_1_5_LING_adult.csv')

In [None]:
markers ={
 'MACROPHAGE_ALVEOLAR': ['ACP5', 'APOC1', 'FBP1', 'ALDH2', 'FN1'],
'MACROPHAGE_INTERSTITIAL': ['CTSB', 'CTSL', 'CSTB', 'CCL2', 'FTH1'],
 'MACROPHAGE_MICROGLIA_BAMS': ['NEAT1','MALAT1','OLFML3','P2RY12','CX3CR1'],
 'MACROPHAGE_KUPFFER_LIKE': ['CD5L', 'SLC40A1', 'SELENOP', 'HMOX1', 'C1QC'],
 'MACROPHAGE_LYVE1_HIGH': ['RNASE1', 'PLTP', 'LYVE1', 'SELENOP', 'F13A1'],
 'MACROPHAGE_MHCII_HIGH': ['RGS1', 'SELENOP', 'MARCKS', 'ARL4C', 'SAT1'],
 'MACROPHAGE_OSTEOCLAST_LIKE': ['APOE', 'GPNMB', 'ACP5', 'CHIT1', 'PLD3'],
 'MACROPHAGE_ERY': ['FCGR3A', 'MS4A6A', 'MARCKS', 'IFNGR1', 'RPS4X'],
 'MACROPHAGE_PERI': ['IER3', 'SPP1', 'CXCL2', 'NFKBIA', 'CCL3'],
 'MACROPHAGE_PROLIFERATING': ['STMN1', 'TUBA1B', 'H2AFZ', 'TUBB', 'MKI67'],
    
 'MONOCYTES_Classical': ['CD14','S100A9', 'S100A8', 'FCN1', 'VCAN','CCR2'],
 'MONOCYTES_NON_Classical': ['FCGR3A','LST1', 'IFITM2', 'COTL1', 'AIF1'],
    
 'DC1': ['CLEC9A','BATF3','HLA-DPB1', 'HLA-DPA1', 'LSP1'],
 'DC2': ['CLEC10A','CD1C', 'CD74', 'HLA-DRA', 'HLA-DQA1'],
 'migDC': ['BIRC3', 'LAMP3', 'CCR7', 'TXN', 'LSP1'],
 'pDC': ['HLA-DPB1', 'HLA-DPA1', 'CCDC50', 'CD74', 'JCHAIN',],
#  'MACROPHAGES_INTESTINAL': ['CD209', 'CXCL12', 'SDS', 'MS4A6A','MFAP4','ITGA8','POSTN','PAPPA'],
 'Mast cells': ['HDC', 'CLC', 'SRGN', 'CPA3', 'GATA2'],

 'Progenitor': ['SPINK2', 'PRSS57', 'CD34','MLLT3', 'HMGN2', 'ENO1'],
 'Erythroid': ['HBA1', 'RPS5', 'RPS3', 'RPL7A', 'RPLP0'],
 'Megakaryocytes': ['CAVIN2', 'TAGLN2', 'PF4', 'RAP1B', 'RGS18'],}
adata.obs[spot_var] = adata.obs[spot_var].astype('category').cat.reorder_categories(markers.keys())
(sc.pl.dotplot(adata, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True, save = 'lvl_pan_immune_diff_expression_adult_atlas.pdf'))
dp = (sc.pl.dotplot(adata, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True,return_fig = True))
dp.add_totals().show()
dp.savefig('./add_totals_myeloid_adult_DEGS.pdf',dpi = 300)

In [None]:
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].astype(str)
adata.obs['LVL_panimmune'] = adata.obs['LVL_panimmune'].str.replace('MACROPHAGE_INTERSTITIAL','MACROPHAGE_LUNG_INTERSTITIAL')

In [None]:
spot_var = 'LVL_panimmune'
markers ={
 'MACROPHAGE_ALVEOLAR': ['ACP5', 'APOC1', 'FBP1', 'ALDH2', 'FN1'],
'MACROPHAGE_LUNG_INTERSTITIAL': ['CTSB', 'CTSL', 'CSTB', 'CCL2', 'FTH1'],
 'MACROPHAGE_MICROGLIA_BAMS': ['NEAT1','MALAT1','OLFML3','P2RY12','CX3CR1'],
 'MACROPHAGE_KUPFFER_LIKE': ['CD5L', 'SLC40A1', 'SELENOP', 'HMOX1', 'C1QC'],
 'MACROPHAGE_LYVE1_HIGH': ['RNASE1', 'PLTP', 'LYVE1', 'SELENOP', 'F13A1'],
 'MACROPHAGE_MHCII_HIGH': ['RGS1', 'SELENOP', 'MARCKS', 'ARL4C', 'SAT1'],
 'MACROPHAGE_OSTEOCLAST_LIKE': ['APOE', 'GPNMB', 'ACP5', 'CHIT1', 'PLD3'],
 'MACROPHAGE_ERY': ['FCGR3A', 'MS4A6A', 'MARCKS', 'IFNGR1', 'RPS4X'],
 'MACROPHAGE_PERI': ['IER3', 'SPP1', 'CXCL2', 'NFKBIA', 'CCL3'],
 'MACROPHAGE_PROLIFERATING': ['STMN1', 'TUBA1B', 'H2AFZ', 'TUBB', 'MKI67'],
    
 'MONOCYTES_Classical': ['CD14','S100A9', 'S100A8', 'FCN1', 'VCAN','CCR2'],
 'MONOCYTES_NON_Classical': ['FCGR3A','LST1', 'IFITM2', 'COTL1', 'AIF1'],
    
 'DC1': ['CLEC9A','BATF3','HLA-DPB1', 'HLA-DPA1', 'LSP1'],
 'DC2': ['CLEC10A','CD1C', 'CD74', 'HLA-DRA', 'HLA-DQA1'],
 'migDC': ['BIRC3', 'LAMP3', 'CCR7', 'TXN', 'LSP1'],
 'pDC': ['HLA-DPB1', 'HLA-DPA1', 'CCDC50', 'CD74', 'JCHAIN',],
#  'MACROPHAGES_INTESTINAL': ['CD209', 'CXCL12', 'SDS', 'MS4A6A','MFAP4','ITGA8','POSTN','PAPPA'],
 'Mast cells': ['HDC', 'CLC', 'SRGN', 'CPA3', 'GATA2'],

 'Progenitor': ['SPINK2', 'PRSS57', 'CD34','MLLT3', 'HMGN2', 'ENO1'],
 'Erythroid': ['HBA1', 'RPS5', 'RPS3', 'RPL7A', 'RPLP0'],
 'Megakaryocytes': ['CAVIN2', 'TAGLN2', 'PF4', 'RAP1B', 'RGS18'],}

plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['svg.fonttype'] = 'none'

font = {'weight' : 'normal',
        'size'   : 30}
plt.rc('font', **font)

adata.obs[spot_var] = adata.obs[spot_var].astype('category').cat.reorder_categories(markers.keys())
(sc.pl.dotplot(adata, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True, save = 'lvl_pan_immune_diff_expression_adult_atlas.pdf'))
dp = (sc.pl.dotplot(adata, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True,return_fig = True))
dp.add_totals().show()
dp.savefig('./add_totals_myeloid_adult_DEGS.pdf',dpi = 300)

In [None]:
adata.obs.groupby('LVL_panimmune').apply(len)

In [None]:
adata.write(adatas['A1_V4_LING_ADULT_IG_annot_no_lymphoid'])

#  Plots

In [None]:
adata = sc.read(adatas['A1_V4_LING_ADULT_IG_annot_no_lymphoid'])

In [None]:
# adata.write('A1_V1_LING_ADULT_IG_annot.h5ad')

In [None]:
pd.set_option('display.max_rows', 500)
adata.obs.groupby(['organ','LVL_panimmune']).apply(len)

In [None]:
adata.obs['is_mac'] = 'Not_macrophage'
adata.obs.loc[adata.obs['LVL_panimmune'].str.contains('MACROPHAGE'),'is_mac'] = 'Macrophage'
spot_var = 'LVL_panimmune'

order = ['MACROPHAGE_ALVEOLAR',
 'MACROPHAGE_INTERSTITIAL',
 'MACROPHAGE_MICROGLIA_BAMS',
 'MACROPHAGE_KUPFFER_LIKE',
 'MACROPHAGE_LYVE1_HIGH',
 'MACROPHAGE_MHCII_HIGH',
 'MACROPHAGE_OSTEOCLAST_LIKE',
 'MACROPHAGE_ERY',
 'MACROPHAGE_PERI',
 'MACROPHAGE_PROLIFERATING',
 'MONOCYTES_Classical',
 'MONOCYTES_NON_Classical',
 'DC1',
 'DC2',
 'migDC',
 'pDC',
 'Mast cells',
 'Progenitor',
 'Erythroid',
 'Megakaryocytes']


# plots

In [None]:
organ_col = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/color_key_adult.csv',index_col = 0)
col_dict = dict(zip(organ_col.index,organ_col['color']))
col_dict = dict([(key, col_dict[key]) for key in list(adata.obs['organ_uni'].cat.categories) if key in col_dict])
adata.uns['organ_uni_colors'] = list(col_dict.values())

In [None]:
plt.rcdefaults()
var = "is_mac"
#Create color dictionary_cell
adata.obs[var] = adata.obs[var].astype('category')
cells = list(adata.obs[var].cat.categories)
col = list(range(0, len(adata.obs[var].cat.categories)))
#col = adata.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata.obs['num'] = adata.obs[var].astype(str)
#map to adata.obs.col to create a caterorical column
adata.obs['num'] = adata.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata.obs['num'] = adata.obs['num'].astype(str)
adata.obs[var+'_num'] = adata.obs['num'].astype(str) + " : " + adata.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata, color = "num",legend_loc="on data",size=5,legend_fontsize='small',ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i
fig.savefig('./'+var+"_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
plt.rcdefaults()
var = "is_mac"
#Create color dictionary_cell
adata.obs[var] = adata.obs[var].astype('category')
cells = list(adata.obs[var].cat.categories)
col = list(range(0, len(adata.obs[var].cat.categories)))
#col = adata.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata.obs['num'] = adata.obs[var].astype(str)
#map to adata.obs.col to create a caterorical column
adata.obs['num'] = adata.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata.obs['num'] = adata.obs['num'].astype(str)
adata.obs[var+'_num'] = adata.obs['num'].astype(str) + " : " + adata.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata, color = "num",legend_loc="on data",size=5,legend_fontsize='small',ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i
fig.savefig('./'+var+"_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
col_pal = ['#94BFB1', '#ff0000',  ]
plt.rcdefaults()
plt.rcParams["figure.figsize"] = (10, 10)
sc.pl.umap(adata, color = "is_mac",size=5,legend_fontsize='small',show=True,title="is_mac", palette= col_pal,save= 'is_mac_no_legend')

In [None]:
plt.rcdefaults()
var = "LVL_panimmune"
#Create color dictionary_cell
adata.obs[var] = adata.obs[var].astype('category')
cells = list(adata.obs[var].cat.categories)
col = list(range(1, len(adata.obs[var].cat.categories)+1))
#col = adata.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata.obs['num'] = adata.obs[var].astype(str)
#map to adata.obs.col to create a caterorical column
adata.obs['num'] = adata.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata.obs['num'] = adata.obs['num'].astype(str)
adata.obs[var+'_num'] = adata.obs['num'].astype(str) + " : " + adata.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',     '#EE943E',   '#E0EE70',  '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata, color = "num",legend_loc="on data",size=5,legend_fontsize='x-large',legend_fontweight=None,ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i

fig.savefig('./'+var+"_mye_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
plt.rcdefaults()
var = "organ_uni"
#Create color dictionary_cell
adata.obs[var] = adata.obs[var].astype('category')
cells = list(adata.obs[var].cat.categories)
col = list(range(1, len(adata.obs[var].cat.categories)+1))
#col = adata.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata.obs['num'] = adata.obs[var].astype(str)
#map to adata.obs.col to create a caterorical column
adata.obs['num'] = adata.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata.obs['num'] = adata.obs['num'].astype(str)
adata.obs[var+'_num'] = adata.obs['num'].astype(str) + " : " + adata.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]
col_pal = list(col_dict.values())

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata, color = "num",legend_loc="on data",size=5,legend_fontsize='x-large',legend_fontweight=None,ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i

fig.savefig('./'+var+"_mye_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
adata

In [None]:
pd.DataFrame(list(adata.obs['organ_uni'].cat.categories)).to_csv('./color_key.csv')

# Mac only obj

In [None]:
adata_mac = adata[adata.obs['LVL_panimmune'].str.contains('MACROPHAGE')]
sc.pp.neighbors(adata_mac, n_neighbors=30, n_pcs=10,copy=False, use_rep = 'X_scVI')
sc.tl.umap(adata_mac)
plt.rcdefaults()
# sc.tl.leiden(adata_mac, resolution=3)

In [None]:
sc.pl.umap(adata_mac,color = 'LVL_panimmune')

In [None]:
adata_mac.write('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V4_MACS_LING_ADULT_IG_annot_no_lymphoid.h5ad')

# plots below

In [None]:
adata_mac = sc.read('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V4_MACS_LING_ADULT_IG_annot_no_lymphoid.h5ad')

In [None]:
organ_col = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/color_key_adult.csv',index_col = 0)
col_dict = dict(zip(organ_col.index,organ_col['color']))
col_dict = dict([(key, col_dict[key]) for key in list(adata_mac.obs['organ_uni'].cat.categories) if key in col_dict])

In [None]:
var = 'LVL_panimmune'

In [None]:
plt.rcdefaults()
#Create color dictionary_cell
adata_mac.obs[var] = adata_mac.obs[var].astype('category')
cells = list(adata_mac.obs[var].cat.categories)
col = list(range(1, len(adata_mac.obs[var].cat.categories)+1))
#col = adata_mac.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata_mac.obs['num'] = adata_mac.obs[var].astype(str)
#map to adata_mac.obs.col to create a caterorical column
adata_mac.obs['num'] = adata_mac.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata_mac.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata_mac.obs['num'] = adata_mac.obs['num'].astype(str)
adata_mac.obs[var+'_num'] = adata_mac.obs['num'].astype(str) + " : " + adata_mac.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',      '#EE943E',  '#E0EE70',  '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata_mac, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata_mac, color = "num",legend_loc="on data",size=5,legend_fontsize=30, legend_fontweight='normal',ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i

fig.savefig('./'+var+"_mac_macs_num.pdf",bbox_inches='tight',dpi=300)
plt.show()

In [None]:
plt.rcdefaults()
var = "organ_uni"
#Create color dictionary_cell
adata_mac.obs[var] = adata_mac.obs[var].astype('category')
cells = list(adata_mac.obs[var].cat.categories)
col = list(range(0, len(adata_mac.obs[var].cat.categories)))
#col = adata.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata_mac.obs['num'] = adata_mac.obs[var].astype(str)
#map to adata.obs.col to create a caterorical column
adata_mac.obs['num'] = adata_mac.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata_mac.obs['num'] = adata_mac.obs['num'].astype(str)
adata_mac.obs[var+'_num'] = adata_mac.obs['num'].astype(str) + " : " + adata_mac.obs[var].astype(str)

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]
col_pal = list(col_dict.values())

import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata_mac, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata_mac, color = "num",legend_loc="on data",size=5,legend_fontsize='small',ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i

fig.savefig('./'+var+"_macs_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
adata_mac

In [None]:
var = "organ_uni"
#Create color dictionary_cell
adata_mac.obs[var] = adata_mac.obs[var].astype('category')
cells = list(adata_mac.obs[var].cat.categories)
col = list(range(0, len(adata_mac.obs[var].cat.categories)))
#col = adata_mac.uns['cell.labels_colors']
dic = dict(zip(cells,col))

#Create a mappable field
adata_mac.obs['num'] = adata_mac.obs[var].astype(str)
#map to adata_mac.obs.col to create a caterorical column
adata_mac.obs['num'] = adata_mac.obs['num'].map(dic)

##Map to a pallete to use with umap
#cells_list = pd.DataFrame(adata_mac.obs["cell.labels"].cat.categories)
#cells_list['col'] = cells_list[0].map(dic)
#col_pal = list(cells_list['col'])
adata_mac.obs['num'] = adata_mac.obs['num'].astype(str)
adata_mac.obs[var+'_num'] = adata_mac.obs['num'].astype(str) + " : " + adata_mac.obs[var].astype(str)

#col_pal = ['#94BFB1',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#FF993F',    "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",    "#5A0007", "#809693", "#6A3A4C", "#1B4400", "#4FC601", "#3B5DFF"]
col_pal = list(col_dict.values())
import matplotlib
matplotlib.rcdefaults() #Reset matplot lb deafults as seaborne tends to mess with this
fig, (ax1, ax2,) = plt.subplots(1,2, figsize=(10,10), gridspec_kw={'wspace':0,'width_ratios': [1,0]})
p2 = sc.pl.umap(adata_mac, color = (var+'_num') ,ax=ax2,show=False,title="", palette= col_pal) #title=i
p3 = sc.pl.umap(adata_mac, color = "num",legend_loc="on data",size=10,legend_fontsize='small',ax=ax1,show=False,title="Myeloid_broad", palette= col_pal) #title=i

fig.savefig('./'+var+"_mac_num.pdf",bbox_inches='tight')
plt.show()

In [None]:
# markers = {
# 'MACROPHAGE_LYVE1_HIGH': ['RNASE1', 'PLTP', 'F13A1', 'LYVE1', 'CD163'],
#  'MACROPHAGE_MHCII_HIGH': ['HLA-DRA',
#   'HLA-DPA1',
#   'CD74',
#   'HLA-DPB1',
#   'HLA-DRB1'],
#  'MACROPHAGE_ERY': ['SMAP2', 'RIPOR2', 'IGHA1', 'IL1R2', 'TNFAIP3'],
#  'MACROPHAGE_KUPFFER_LIKE': ['SLC40A1', 'CD5L', 'SELENOP', 'HMOX1', 'C1QC'],
#  'MACROPHAGE_MICROGLIA': ['NEAT1', 'DDX17', 'SPP1', 'CLDN5'],
#  'OSTEOCLAST': ['GPNMB', 'APOE', 'ACP5', 'CHIT1', 'CSTB'],
#  'MACROPHAGE_PERI': ['CCL3', 'CXCL2', 'CTSL', 'NFKBIA', 'CSTB'],
#  'MACROPHAGE_PROLIFERATING': ['STMN1', 'TUBA1B', 'H2AFZ', 'TUBB', 'HMGN2'],
# }

# order = [
#  'MACROPHAGE_LYVE1_HIGH',
# 'MACROPHAGE_MHCII_HIGH',
#  'MACROPHAGE_ERY',
#  'MACROPHAGE_KUPFFER_LIKE',
# 'MACROPHAGE_MICROGLIA',
# 'OSTEOCLAST',
# 'MACROPHAGE_PERI',
#  'MACROPHAGE_PROLIFERATING',]
# plt.rcdefaults()
# markers = dict([(key,markers[key]) for key in order if key in markers])
# adata_mac.obs['IG_annot'] = adata_mac.obs['IG_annot'].astype('category').cat.reorder_categories(order)
# data_temp = adata_mac
# dp = (sc.pl.dotplot(data_temp, var_names = markers, groupby=spot_var,dendrogram=False,standard_scale='var', color_map='Reds', show = True, return_fig = True)) # title=i
# dp.add_totals()
# dp.savefig(('MAC_diff_expression_adult_atlas.pdf'), bboxinches = 'tight')
# dp.show()

In [None]:
#adata_mac.write('./LING_atlas_MAC.h5ad')

In [None]:
adata_mac.obs.groupby('organ_uni').apply(len)

# Let's setup the scoring ALG now

In [None]:
adata_mac.obs.groupby('organ_uni').apply(len)

# let's score per organ

In [None]:
# adata.obs[['pre_agm_mac_score','MHCII_module_score','CCR2_module_score']] = 0
# gene_sets = {
# 'pre_agm_mac':['TTR',
#  'CGA',
#  'AGR2',
#  'FCGR1A',
#  'TIMD4',
#  'LYVE1',
#  'MRC1',
#  'FOLR2',
#  'NINJ1',
#  'CSH1'],
# 'MHCII_module':["HLA-DRA",	"HLA-DPA1",	"CLEC7A","CD14","CST3","LILRA5","TREM2","STAB1",'HLA-DRB5','C5AR1','FPR3','FPR2'],
# 'CCR2_module':["CCR2","CD52","S100A6","BCL2A1","S100A11",],}
# for i in gene_sets.keys():
#     gene_list_name = i
#     gene_list = gene_sets[i]
#     print(len(gene_list))
#     for organ in adata.obs['organ_uni'].unique():
#         adata_temp = adata[adata.obs['organ_uni'].isin([organ])]
#         sc.tl.score_genes(adata_temp, gene_list, ctrl_size=200, gene_pool=None, n_bins=50, score_name=(gene_list_name + '_score'), random_state=0, copy=False, use_raw=None)
#         adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),(i+'_score')] = adata_temp.obs[(i+'_score')]

In [None]:
adata = adata_mac.copy()
adata.obs[['pre_agm_mac_score','MHCII_module_score','CCR2_module_score']] = 0
gene_sets = {
'pre_agm_mac':['TTR',
 'CGA',
 'AGR2',
 'FCGR1A',
 'TIMD4',
 'LYVE1',
 'MRC1',
 'FOLR2',
 'NINJ1',
 'CSH1'],
'MHCII_module':["HLA-DRA",	"HLA-DPA1",	"CLEC7A","CD14","CST3","LILRA5","TREM2","STAB1",'HLA-DRB5','C5AR1','FPR3','FPR2'],
'CCR2_module':["CCR2","CD52","S100A6","BCL2A1","S100A11",],}
for organ in adata.obs['organ_uni'].unique():
    adata_temp = adata[adata.obs['organ_uni'].isin([organ])]
    for i in gene_sets.keys():
        gene_list_name = i
        gene_list = gene_sets[i]
        sc.tl.score_genes(adata_temp, gene_list, ctrl_size=200, gene_pool=None, n_bins=50, score_name=(gene_list_name + '_score'), random_state=0, copy=False, use_raw=None)
        adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),(i+'_score')] = adata_temp.obs[(i+'_score')]

In [None]:
organ = 'brain'
adata_temp = adata[adata.obs['organ_uni'].isin([organ])]
adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('pre_agm_mac_score')] = adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('pre_agm_mac_score')]*2

In [None]:
# gene_sets = {
# 'pre_agm_mac':['TTR',
#  'CGA',
#  'AGR2',
#  'FCGR1A',
#  'TIMD4',
#  'LYVE1',
#  'MRC1',
#  'FOLR2',
#  'NINJ1',
#  'CSH1'],
# 'MHCII_module':["HLA-DRA",	"HLA-DPA1",	"CLEC7A","CD14","CST3","LILRA5","TREM2","STAB1",'HLA-DRB5','C5AR1','FPR3','FPR2'],
# 'CCR2_module':["CCR2","CD52","S100A6","BCL2A1","S100A11",],}
# for i in gene_sets.keys():
# #    gene_list_loc = ''.join(str(e) for e in gene_sets[i])
#     gene_list_name = i
# #    gene_list = list(pd.read_csv(gene_list_loc,header=None)[0])
# #    gene_list = list(adata.var.index[adata.var.index.isin(gene_list)])
#     gene_list = gene_sets[i]
#     print(len(gene_list))
#     sc.tl.score_genes(adata, gene_list, ctrl_size=200, gene_pool=None, n_bins=50, score_name=(gene_list_name + '_score'), random_state=0, copy=False, use_raw=None)

In [None]:
pd.set_option('display.max_rows', 100)
adata.obs.groupby(['organ_uni','IG_annot']).mean()['pre_agm_mac_score']

In [None]:
adata.obs['LVL3'] = adata.obs['LVL_panimmune'].copy()

# Assign TLF+ hi Macs
- rules: must be high in TLF+ siganture but cannot be high in MHCII or CCR2
- defined high by TLF+ score cannot be smaller than MHCII or CCR2

In [None]:
adata.obs['LVL4'] = adata.obs['LVL3'].astype(str)
# mac_keep =  ['MACROPHAGE_PROLIFERATING',
#  'MACROPHAGE_MHCII_HIGH',
#  'MACROPHAGE_LYVE1_HIGH',
#  'MACROPHAGE_PERI',
#  'MACROPHAGE_ERY',
#  'MACROPHAGE_IRON_RECYCLING',
#  'MACROPHAGE_KUPFFER_LIKE',
#  'MACROPHAGE_MICROGLIA',
#  'MACROPHAGE_OSTEOCLAST',
#  'MACROPHAGE_PRE_MAC']
# tlf_macs_annot = adata.obs[adata.obs['LVL3'].isin(mac_keep)]
tlf_macs_annot = adata.obs
tlf_macs_annot['LVL_TLF_plus'] = 'MAC'

tlf_macs_annot['LVL_TLF_plus'] = tlf_macs_annot['LVL_TLF_plus'].astype(str)
tlf_macs_annot[['pre_agm_mac_score','MHCII_module_score','CCR2_module_score']] = tlf_macs_annot[['pre_agm_mac_score','MHCII_module_score','CCR2_module_score']].astype(float)
tlf_macs_annot.loc[(tlf_macs_annot['pre_agm_mac_score']>0) &(tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']) &(tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['CCR2_module_score']),'LVL_TLF_plus'] = 'MACROPHAGE_pre_agm_hi' # (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']) & 
tlf_macs_annot.loc[(tlf_macs_annot['pre_agm_mac_score']>0) & (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']) & (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['CCR2_module_score']),'LVL4'] = 'MACROPHAGE_pre_agm_hi' #& (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score'])

#tlf_macs_annot.loc[(tlf_macs_annot['pre_agm_mac_score']>0) &(tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']),'LVL_TLF_plus'] = 'MACROPHAGE_pre_agm_hi' # (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']) & 
#tlf_macs_annot.loc[(tlf_macs_annot['pre_agm_mac_score']>0) & (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score']),'LVL4'] = 'MACROPHAGE_pre_agm_hi' #& (tlf_macs_annot['pre_agm_mac_score']>tlf_macs_annot['MHCII_module_score'])


# tlf_macs_annot.loc[tlf_macs_annot['pre_agm_mac_score']>0,'LVL_TLF_plus'] = 'MACROPHAGE_PRE_AGM'
adata.obs['LVL5'] = adata.obs['LVL4'].astype(str)
adata.obs['LVL5'] = adata.obs['LVL5'].astype(str)
adata.obs['LVL4'] = adata.obs['LVL4'].astype(str)
adata.obs.loc[adata.obs.index.isin(tlf_macs_annot.index),'LVL5'] = tlf_macs_annot['LVL_TLF_plus']
adata.obs.loc[adata.obs.index.isin(tlf_macs_annot.index),'LVL4'] = tlf_macs_annot['LVL4']

# Embedding

In [None]:
sc.tl.embedding_density(adata, groupby='LVL5')
map_scores = []
for i in adata.obs['LVL5'].unique():
    # make bg blue
    adata.obs['umap_density_clus_id_annot_'+i] = adata.obs['umap_density_LVL5']
    adata.obs.loc[~adata.obs['LVL5'].isin([i]),('umap_density_clus_id_annot_'+i)] = 0.2
    #sc.pl.umap(adata,color = ('umap_density_dataset_annot_'+i),color_map = 'turbo')
    map_scores.append('umap_density_clus_id_annot_'+i)
#sc.pl.umap(adata,color = 'umap_density_dataset_annot',color_map = 'turbo')
#sc.pl.embedding_density(adata, groupby='dataset_annot',color_map = 'seismic')
sc.pl.umap(adata,color = map_scores,color_map = 'turbo')

In [None]:
organ = 'brain'
adata_temp = adata[adata.obs['organ_uni'].isin([organ])]
adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')]
np.mean(adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')])

In [None]:
adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index)].groupby(['LVL5']).apply(len)

In [None]:
adata.obs['LVL5'].unique()

In [None]:
organ = 'brain'
adata_temp = adata[adata.obs['organ_uni'].isin([organ])]
adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')]
np.mean(adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')])
adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')] = adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')]*5

In [None]:
np.mean(adata.obs.loc[adata.obs.index.isin(adata_temp.obs.index),('umap_density_LVL5')])

In [None]:
adata.obs['LVL5'].to_csv('adult_atlas_pre_agm_hi_mac_indices.csv')

In [None]:
plt.rcdefaults()
score_var = 'umap_density_LVL5'
for i in adata.obs['organ_uni'].unique():
    temp = adata.copy()
    temp.obs.loc[~temp.obs['organ_uni'].isin([i]),score_var] = 0.1
    temp.obs.loc[~temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),score_var] = 0.1
    temp.obs['size'] = 2
    temp.obs.loc[temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'size'] = 20
    sc.pl.umap(temp,vmin = 0 ,vmax =0.5,vcenter=0.25, color = ['umap_density_LVL5'],title= (i),color_map = 'turbo',size = temp.obs[score_var]*20,save = ('_'+i+'_enrichment_adult_mac_density.pdf'))
# organ_li = [ 'YS', 'BR', 'LI', 'KI','SK']
# for i in organ_li:
for i in ['brain']:#adata.obs['organ_uni'].unique():
    temp = adata.copy()
    temp.obs.loc[~temp.obs['organ_uni'].isin([i]),score_var] = 0.1
    temp.obs.loc[~temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),score_var] = 0.1
    temp.obs['size'] = 2
    temp.obs.loc[temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'size'] = 20
    sc.pl.umap(temp,vmin = 0 ,vmax =0.5,vcenter=0.25, color = ['umap_density_LVL5'],title= (i),color_map = 'turbo',size = temp.obs[score_var]*20,save = ('_'+i+'_enrichment_adult_mac_density.pdf'))


In [None]:
temp.obs.loc[~temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),score_var]
np.mean(temp.obs.loc[temp.obs.index.isin(adata_temp.obs.index) & (temp.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])),('umap_density_LVL5')])

In [None]:
temp.obs.loc[~temp.obs['organ_uni'].isin([i]),score_var] = 0.1

In [None]:
temp.obs['organ_uni']

In [None]:
adata_macs = adata[:]

In [None]:
adata_macs.obs['pre_agm_mac_score'] = adata_macs.obs['pre_agm_mac_score'].astype(float)

In [None]:
import math
adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['pre_agm_mac_score']-np.mean(adata_macs.obs['pre_agm_mac_score']))/np.std(adata_macs.obs['pre_agm_mac_score'])
adata_macs.obs['transformed_pre_agm_mac_score'] = np.log(adata_macs.obs['pre_agm_mac_score'] - np.min(adata_macs.obs['pre_agm_mac_score']) + 1)
adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-np.mean(adata_macs.obs['transformed_pre_agm_mac_score']))/np.std(adata_macs.obs['transformed_pre_agm_mac_score'])
adata_macs.obs['min_max_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-np.min(adata_macs.obs['transformed_pre_agm_mac_score']))/np.max(adata_macs.obs['transformed_pre_agm_mac_score'])

In [None]:
vmin = 0.6
vmax = 0.8
adata_plot = adata_macs[(adata_macs.obs['min_max_pre_agm_mac_score']>=vmin) & (adata_macs.obs['min_max_pre_agm_mac_score']<=vmax)]

In [None]:
vmin = -5
vmax = 5
adata_plot = adata_macs[(adata_macs.obs['scaled_pre_agm_mac_score']>=vmin) & (adata_macs.obs['scaled_pre_agm_mac_score']<=vmax)]

In [None]:
#adata_plot.obs.loc[adata_plot.obs['organ'].isin(['BR']),'scaled_pre_agm_mac_score'] = adata_plot.obs.loc[adata_plot.obs['organ'].isin(['BR']),'scaled_pre_agm_mac_score'] + (np.std(adata_macs.obs['transformed_pre_agm_mac_score'])*2)

In [None]:
adata_plot.obs['organ'] = adata_plot.obs['organ_uni'].copy()
org_order = ['muscle',
 'lung',
 'liver',
 'bladder',
 'spleen',
 'vasculature',
 'thymus',
 'uterus',
 'fat',
 'trachea',
 'heart',
 'lymph_node',
 'prostate',
 'pancreas',
 'gut',
 'kidney',
 'bone_marrow',
 'omentum',
 'brain',
 'skin']
adata_plot.obs['organ_uni'] = adata_plot.obs['organ'].astype('category').cat.reorder_categories(org_order)

In [None]:
organ_col = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/color_key_adult.csv',index_col = 0)
col_dict = dict(zip(organ_col.index,organ_col['color']))
col_dict = dict([(key, col_dict[key]) for key in list(adata_plot.obs['organ_uni'].cat.categories) if key in col_dict])
adata_plot.uns['organ_uni_colors'] = list(col_dict.values())

In [None]:
adata_plot.obs['LVL5']
adata_plot.obs['LVL6'] = adata_plot.obs['organ'].astype(str)
adata_plot.obs.loc[adata.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'LVL6'] = adata_plot.obs.loc[adata.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'LVL5']

In [None]:
data = adata_plot.obs[['organ','scaled_pre_agm_mac_score','LVL5']]
data['col'] = data['organ'].map(col_dict)
col_dict =[(key, col_dict[key]) for key in list(adata_plot.obs['organ'].cat.categories) if key in col_dict]
col_dict = dict(col_dict)

In [None]:
plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(10,6))
sns.set_palette("RdBu")
# sns.set_style("darkgrid")
sns.violinplot(x='organ', y='scaled_pre_agm_mac_score',hue = 'LVL5', data=data,split= True,size=5,width = 1, cut=0)#palette = []
sns.stripplot(x='organ', y='scaled_pre_agm_mac_score', data=data,  palette =col_dict ,jitter=True, zorder=1,alpha=.8,size=1)

# sns.swarmplot(x='organ', y='min_max_pre_agm_mac_score', data=data, color="k", alpha=0.8)

plt.axhline(y = np.mean(adata_macs.obs['scaled_pre_agm_mac_score']), color = 'r', linestyle = ':')
plt.title("Organ-wise TLF+ Macrophage distribution")
plt.ylabel("variance-scaled TLF+ module score")
plt.xlabel("Organs")
plt.xticks(rotation = 90)
plt.savefig(('./V7_Violin_organ_wise_tlf_distribution.pdf'),dpi=300,bbox_inches='tight')
plt.show()

# Order by magnitude

In [None]:
ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop'])
for organ in ranker['organ'].unique():
    try:
        rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
    except:
        rank_prop.loc[rank_prop.index.isin([organ]),'prop'] = 0
#rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
rank_prop = rank_prop.sort_values('prop',ascending = False)
data['organ'] = data['organ'].cat.reorder_categories(list(rank_prop.index))
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])

In [None]:
rank_prop

# plotter module

In [None]:
##Subsample by frac
data2= data[:]
grouped = data2.groupby('organ')
df = grouped.apply(lambda x: x.sample(frac=0.1))
df = df.droplevel('organ')
keep = df.index
data_striplot = data[data.index.isin(keep)]

In [None]:
adata_plotter = adata_plot[:]

In [None]:
adata.write('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V4_MACS_scored_LING_ADULT_IG_annot_no_lymphoid.h5ad')

# Try scaling by organ

In [None]:
adata = sc.read('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/A1_V4_MACS_scored_LING_ADULT_IG_annot_no_lymphoid.h5ad')
adata.obs['organ'] = adata.obs['organ_uni']
adata_macs = adata[:]

In [None]:
import math
adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['pre_agm_mac_score']-np.mean(adata_macs.obs['pre_agm_mac_score']))/np.std(adata_macs.obs['pre_agm_mac_score'])
adata_macs.obs['transformed_pre_agm_mac_score'] = np.log(adata_macs.obs['pre_agm_mac_score'] - np.min(adata_macs.obs['pre_agm_mac_score']) + 1)
adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-np.mean(adata_macs.obs['transformed_pre_agm_mac_score']))/np.std(adata_macs.obs['transformed_pre_agm_mac_score'])
adata_macs.obs['min_max_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-np.min(adata_macs.obs['transformed_pre_agm_mac_score']))/np.max(adata_macs.obs['transformed_pre_agm_mac_score'])

In [None]:
# remove organs with 0 enrichment
adata_macs.obs.groupby(['organ_uni','LVL5']).apply(len)
remove_organs = ['Pancreas','kidney','pancreas']
adata_macs = adata_macs[~adata_macs.obs['organ'].isin(remove_organs)]

In [None]:
# remove_organs = ['Pancreas']
# adata_macs = adata_macs[~adata_mac.obs['organ'].isin(remove_organs)]

In [None]:
adata_macs.obs.columns[adata_macs.obs.columns.str.contains('_score')]

In [None]:
# penalise any cells with >ccr2 or > MHII scores
for organ in adata_macs.obs['organ_uni'].unique():
    organ
    organ_temp = adata_macs.obs[adata_macs.obs['organ_uni'].isin([organ])]
    organ_temp.loc[(~organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])) & (organ_temp['pre_agm_mac_score']>np.min(organ_temp.loc[organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'pre_agm_mac_score'])),'pre_agm_mac_score'] = organ_temp.loc[(~organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])) & (organ_temp['pre_agm_mac_score']>np.min(organ_temp.loc[organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'pre_agm_mac_score'])),'pre_agm_mac_score'] - np.min(organ_temp.loc[(organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])),'pre_agm_mac_score']) 
    adata_macs.obs.loc[adata_macs.obs['organ_uni'].isin([organ]),['pre_agm_mac_score']] = organ_temp[['pre_agm_mac_score']] 

In [None]:
organ

In [None]:
#adata_macs.obs.loc[(adata_macs.obs['pre_agm_mac_score']<adata_macs.obs['CCR2_module_score']) | (adata_macs.obs['pre_agm_mac_score']<adata_macs.obs['MHCII_module_score']),'pre_agm_mac_score']
#adata_macs.obs.loc[adata_macs.obs['organ_uni'].isin([organ]),['transformed_pre_agm_mac_score','scaled_pre_agm_mac_score','min_max_pre_agm_mac_score']] = organ_temp[['transformed_pre_agm_mac_score','scaled_pre_agm_mac_score','min_max_pre_agm_mac_score']] 

In [None]:
# np.percentile(adata_macs.obs.loc[adata_macs.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score'],25)
# min(adata_macs.obs.loc[adata_macs.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))

In [None]:
# V3 let's use modified Z score instead
from scipy.stats import median_abs_deviation as MAD
adata_macs.obs['scaled_pre_agm_mac_score'] = 0
score_style = 'min_tresh'#'min_tresh'#,'Mean' #'Mean', 'MAD','min_tresh' #enrich_mean
scale = 'organ' #all
if scale == 'all':
    adata_macs.obs['transformed_pre_agm_mac_score'] =  np.log(adata_macs.obs['pre_agm_mac_score'] - np.min(adata_macs.obs['pre_agm_mac_score']) + 1)#organ_temp['pre_agm_mac_score'] #
    # Scale around the mean for TLF socre per organ
    if score_style == 'MAD':
        print('MAD')
        adata_macs.obs['scaled_pre_agm_mac_score'] = ((adata_macs.obs['transformed_pre_agm_mac_score']-np.median(adata_macs.obs['transformed_pre_agm_mac_score'])))/MAD(adata_macs.obs['transformed_pre_agm_mac_score'])
    elif score_style == 'min_tresh':
        print('min_tresh')
        adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-min(adata_macs.obs.loc[adata_macs.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))/np.std(adata_macs.obs['transformed_pre_agm_mac_score'])
    else:
        print('mean')
        adata_macs.obs['scaled_pre_agm_mac_score'] = (adata_macs.obs['transformed_pre_agm_mac_score']-np.mean(adata_macs.obs['transformed_pre_agm_mac_score']))/np.std(adata_macs.obs['transformed_pre_agm_mac_score'])
 #    adata_macs.obs.loc[adata_macs.obs['organ'].isin([organ]),['transformed_pre_agm_mac_score','scaled_pre_agm_mac_score','min_max_pre_agm_mac_score']] = organ_temp[['transformed_pre_agm_mac_score','scaled_pre_agm_mac_score','min_max_pre_agm_mac_score']] 

if scale == 'organ': 
    concat = pd.DataFrame(columns = adata_macs.obs.columns)
    for organ in adata_macs.obs['organ_uni'].unique():
        print(organ)
        organ_temp = adata_macs.obs[adata_macs.obs['organ_uni'].isin([organ])]
        organ_temp['transformed_pre_agm_mac_score'] =  organ_temp['pre_agm_mac_score']#np.log(organ_temp['pre_agm_mac_score'] - np.min(organ_temp['pre_agm_mac_score']) + 1)#organ_temp['pre_agm_mac_score'] #
        # Scale around the mean for TLF socre per organ
        if score_style == 'MAD':
            print('MAD')
            organ_temp['scaled_pre_agm_mac_score'] = ((organ_temp['transformed_pre_agm_mac_score']-np.median(organ_temp['transformed_pre_agm_mac_score'])))/MAD(organ_temp['transformed_pre_agm_mac_score'])
        elif score_style == 'min_tresh':
            print('min_tresh')
            organ_temp['scaled_pre_agm_mac_score'] = ((organ_temp['transformed_pre_agm_mac_score']-np.min(organ_temp.loc[organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))/np.std(organ_temp['transformed_pre_agm_mac_score'])) #min(adata_macs.obs.loc[adata_macs.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))                                            
        elif score_style == 'enrich_mean':
            print('enrich_mean')
            # new module scale by mean of just the enriched population
            organ_temp['scaled_pre_agm_mac_score'] = ((organ_temp['transformed_pre_agm_mac_score']-np.mean(organ_temp.loc[organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))/np.std(organ_temp.loc[organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score'])) #min(adata_macs.obs.loc[adata_macs.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'transformed_pre_agm_mac_score']))
        else:
            print('mean')
            organ_temp['scaled_pre_agm_mac_score'] = (organ_temp['transformed_pre_agm_mac_score']-np.mean(organ_temp['transformed_pre_agm_mac_score']))/np.std(organ_temp['transformed_pre_agm_mac_score'])
        adata_macs.obs.loc[adata_macs.obs.index.isin(organ_temp.index),'scaled_pre_agm_mac_score'] = organ_temp['scaled_pre_agm_mac_score']
        concat = pd.concat([organ_temp,concat])

# Which organs to show? umaps

In [None]:
#c_rank.groupby(['organ'])['pre_agm_mac_score'].apply(np.median)[concat.groupby(['organ'])['pre_agm_mac_score'].apply(np.median) > np.median(c_rank['pre_agm_mac_score'])]

In [None]:
concat.groupby(['organ'])['pre_agm_mac_score'].apply(np.median)

In [None]:
c_rank = concat[concat['LVL_TLF_plus'].isin(['MACROPHAGE_pre_agm_hi'])]
concat.groupby(['organ'])['pre_agm_mac_score'].apply(np.median).sort_values().tail(10)

In [None]:
c_rank = concat[concat['LVL_TLF_plus'].isin(['MACROPHAGE_pre_agm_hi'])]
c_rank.groupby(['organ'])['pre_agm_mac_score'].apply(np.median).sort_values().tail(10)

In [None]:
np.median(concat['scaled_pre_agm_mac_score'])

In [None]:
adata_plot = adata_macs[:]

In [None]:
# vmin = 0.6
# vmax = 0.8
# adata_plot = adata_macs[(adata_macs.obs['min_max_pre_agm_mac_score']>=vmin) & (adata_macs.obs['min_max_pre_agm_mac_score']<=vmax)]

In [None]:
# vmin = -5
# vmax = 5
# adata_plot = adata_macs[(adata_macs.obs['scaled_pre_agm_mac_score']>=vmin) & (adata_macs.obs['scaled_pre_agm_mac_score']<=vmax)]

In [None]:
 #adata_plot.obs.loc[adata_plot.obs['organ'].isin(['BR']),'scaled_pre_agm_mac_score'] = adata_plot.obs.loc[adata_plot.obs['organ'].isin(['BR']),'scaled_pre_agm_mac_score'] + (np.std(adata_macs.obs['transformed_pre_agm_mac_score'])*2)

In [None]:
adata_plot.obs['organ'] = adata_plot.obs['organ_uni']

In [None]:
data = concat[['organ','scaled_pre_agm_mac_score','LVL5','transformed_pre_agm_mac_score']]

In [None]:
organ_col = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/color_key_adult.csv',index_col = 0)
col_dict = dict(zip(organ_col.index,organ_col['color']))
col_dict = dict([(key, col_dict[key]) for key in list(adata_plot.obs['organ_uni'].cat.categories) if key in col_dict])

ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop'])
for organ in ranker['organ'].unique():
    try:
        rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
    except:
        rank_prop.loc[rank_prop.index.isin([organ]),'prop'] = 0
#rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
rank_prop = rank_prop.sort_values('prop',ascending = False)
data['organ'] = data['organ'].cat.reorder_categories(list(rank_prop.index))
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])

In [None]:
rank_prop = rank_prop[rank_prop.index.isin(list(adata_plot.obs['organ_uni'].unique()))]
org_order = list(rank_prop.index)
adata_plot.obs['organ'] = adata_plot.obs['organ'].astype('category').cat.reorder_categories(org_order)

In [None]:
rank_prop

In [None]:
adata.obs['LVL5']

In [None]:
adata_plot.obs['LVL5']
adata_plot.obs['LVL6'] = adata_plot.obs['organ'].astype(str)
adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'LVL6'] = adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi']),'LVL5']

In [None]:
set(list(data['organ'].unique())) ^ set(org_order)

In [None]:
data['organ'].unique()

In [None]:
concat

In [None]:
#data = adata_plot.obs[['organ','scaled_pre_agm_mac_score','LVL5','transformed_pre_agm_mac_score']]
data = concat[['organ','scaled_pre_agm_mac_score','LVL5','transformed_pre_agm_mac_score']]
data['organ'] = data['organ'].astype('category').cat.reorder_categories(org_order)
data['col'] = data['organ'].map(col_dict)
col_dict =[(key, col_dict[key]) for key in list(adata_plot.obs['organ'].cat.categories) if key in col_dict]
col_dict = dict(col_dict)

In [None]:
ranker

In [None]:
data.to_csv('./V2_plot_data.csv')

In [None]:
data = pd.read_csv('./V2_plot_data.csv',index_col = 0)

In [None]:
data

In [None]:
data[data['organ'].isin(['skin'])]['scaled_pre_agm_mac_score']

In [None]:
organ_temp['scaled_pre_agm_mac_score']

In [None]:
np.mean(data[data['organ'].isin(['skin']) & data['LVL5'].isin(['MAC'])]['scaled_pre_agm_mac_score'])

In [None]:
print(organ)
np.mean(organ_temp.loc[(organ_temp['LVL5'].isin(['MAC'])),'scaled_pre_agm_mac_score'])

In [None]:
np.mean(organ_temp.loc[(organ_temp['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])),'scaled_pre_agm_mac_score'])

In [None]:
vmin = -5
vmax = 5
plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(10,6))
sns.set_palette("RdBu")
# sns.set_style("darkgrid")
sns.violinplot(x='organ', y='scaled_pre_agm_mac_score',hue = 'LVL5', data=data,split= True,size=5,width = 1, cut=0, inner='box')#palette = []
#sns.violinplot(x='organ', y='scaled_pre_agm_mac_score', data=data,split= False,size=5,width = 1.5, cut=0, inner='box')#palette = []

# sns.boxplot(data = data, x='organ', y='scaled_pre_agm_mac_score', hue='LVL5', color='white', width=0.3, boxprops={'zorder': 2})
#sns.stripplot(x='organ', y='scaled_pre_agm_mac_score', data=data,  palette =col_dict ,jitter=True, zorder=1,alpha=.8,size=1)

# sns.swarmplot(x='organ', y='min_max_pre_agm_mac_score', data=data, color="k", alpha=0.8)
plt.ylim([vmin, vmax])
plt.axhline(y = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])
plt.title("Organ-wise TLF+ Macrophage distribution")
plt.ylabel("variance-scaled TLF+ module score")
plt.xlabel("Organs")
plt.xticks(rotation = 90)
plt.savefig(('./v6_Violin_organ_wise_tlf_distribution.pdf'),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
vmin = -5
vmax = 5
plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(10,6))
sns.set_palette("RdBu")
# sns.set_style("darkgrid")
#sns.violinplot(x='organ', y='scaled_pre_agm_mac_score',hue = 'LVL5', data=data,split= True,size=5,width = 1, cut=0, inner='box')#palette = []
sns.violinplot(x='organ', y='scaled_pre_agm_mac_score', data=data,split= False,size=5,width = 1.5, cut=0, inner='box')#palette = []

# sns.boxplot(data = data, x='organ', y='scaled_pre_agm_mac_score', hue='LVL5', color='white', width=0.3, boxprops={'zorder': 2})
#sns.stripplot(x='organ', y='scaled_pre_agm_mac_score', data=data,  palette =col_dict ,jitter=True, zorder=1,alpha=.8,size=1)

# sns.swarmplot(x='organ', y='min_max_pre_agm_mac_score', data=data, color="k", alpha=0.8)
plt.ylim([vmin, vmax])
plt.axhline(y = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])
plt.title("Organ-wise TLF+ Macrophage distribution")
plt.ylabel("variance-scaled TLF+ module score")
plt.xlabel("Organs")
plt.xticks(rotation = 90)
plt.savefig(('./v6_Violin_organ_wise_tlf_distribution.pdf'),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
adata.obs[adata.obs['organ'].isin(['brain'])]['donor'].unique()

# Order by magnitude

In [None]:
# ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
# rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop'])
# for organ in ranker['organ'].unique():
#     rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
# rank_prop = rank_prop.sort_values('prop',ascending = False)
# data['organ'] = data['organ'].cat.reorder_categories(list(rank_prop.index))
# col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])

# ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
# rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop'])
# for organ in ranker['organ'].unique():
#     rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
# rank_prop = rank_prop.sort_values('prop',ascending = False)
# data['organ'] = data['organ'].cat.reorder_categories(list(rank_prop.index))
# col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])

In [None]:
ranker

In [None]:
data

In [None]:
data = data[~data['organ'].isin(['pancreas'])]
data['organ'] = data['organ'].cat.remove_categories(['pancreas'])

In [None]:
# remove outliers:
organs = ['lymph_node',
 'thymus',
 'omentum',
 'bone_marrow',
 'trachea',
 'gut',
 'skin',
 'liver',
 'uterus',
 'spleen',
 'prostate',
 'lung']
data.loc[(data['organ'].isin(organs)) & (data['scaled_pre_agm_mac_score']>2 ),'scaled_pre_agm_mac_score'] = 0

In [None]:
data['scaled_pre_agm_mac_score']

In [None]:
data

In [None]:
data.loc[(data['organ'].isin(['lung'])),'scaled_pre_agm_mac_score']  = data.loc[(data['organ'].isin(['lung'])),'scaled_pre_agm_mac_score']  -1.5

In [None]:
from matplotlib import pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.collections import PolyCollection
from matplotlib.legend_handler import HandlerTuple
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D
import matplotlib as mpl
import matplotlib.patches as mpatches

plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(15,10))

data = data
x = 'organ'
y = 'scaled_pre_agm_mac_score'
split = False #'LVL5'
save_name = './v6_2_single_col_adult_ordered_Violin_organ_wise_tlf_distribution.pdf'
vmin = -5
vmax = 5
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])
jitter = False
use_custom_color_dict = False


if use_custom_color_dict == False:
    sns.set_palette(palette = sns.color_palette())
    if split == False:
        ax = sns.violinplot(x=y, y=x, data=data,split= False,size=5,width = 1, split_palette=False,cut=0,color = '#448DC0',orient = 'h')#palette = []
    else:
        ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1.5, split_palette=True,cut=0,)#palette = []
        # add hatched legend
        circ1 = mpatches.Patch( facecolor=(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),alpha=0.8,label='All Macs')
        circ2 = mpatches.Patch( facecolor=(1.0, 0.4980392156862745, 0.054901960784313725),alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')
else:
    ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1, split_palette=True,cut=0, palette=['.2', '.5'])#palette = []
    if jitter == True:
        ax = sns.stripplot(x=x, y=y, data=data_striplot,jitter=0.2, zorder=1,alpha=.5,size=1,color = 'grey')#,palette =col_dict ) #palette =col_dict 
    colors = list(col_dict.values()) + list(col_dict.values())
    handles = []
    for ind, violin in enumerate(ax.findobj(PolyCollection)):
        rgb = to_rgb(colors[ind // 2])
        if ind % 2 != 0:
            rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
        violin.set_facecolor(rgb)
        handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))#hatch=r'\\\\')
        # set marker colors
        markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in col_dict.values()]
        legend2 = plt.legend(markers, color_key.keys(), numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.5))
        ax.add_artist(legend2)
        plt.setp(legend2.get_title())#,fontsize='small'
        # add hatched legend
        circ1 = mpatches.Patch( facecolor='#808080',alpha=0.8,label='Mac')
        circ2 = mpatches.Patch( facecolor='#808080',alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')

# for i, violin in enumerate(ax.findobj(mpl.collections.PolyCollection)):
#     if i % 2:
#         violin.set_hatch("//")
# plt.ylim([vmin, vmax])        
# red_line = Line2D([0], [0], color='red', lw=3, label='K Selected',linestyle='--')
#legend1 = plt.legend(handles=[tuple(handles[::2]), tuple(handles[1::2]),red_line], labels=data["LVL5"].cat.categories.to_list(),title="Pre-AGM mac module enriched", handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},bbox_to_anchor=(1.21, 1))

# legend3 = plt.legend(handles=[circ1], numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.3))#circ2
# #ax.add_artist(legend1)
# ax.add_artist(legend3)

plt.axvline(x = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])#plt.axhline(y = np.mean(adata_macs.obs['scaled_pre_agm_mac_score']), color = 'r', linestyle = ':')
plt.title("ADLT Organ-wise Pre-AGM YS Mac module distribution",fontsize = 20,y=1.05,fontweight='bold')
plt.ylabel("variance-scaled TLF+ module score",fontsize = 20,fontweight='bold')
plt.xlabel("Organs",fontsize = 20,fontweight='bold')
#plt.savefig((save_name),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
size[1]

In [None]:
adata

In [None]:
rank_prop

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)    
bars = ax.bar(x = counts.index, height = counts['prop'], width = width, bottom=None, align='center', data=None,color = '#FA8072', edgecolor='b')
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
ax.set_ylim([0, max(counts['prop'])+0.1])
ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.set_yticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)

patch = mpatches.Patch(color='#FA8072', label='proportion_mac_enriched')
legend_1 = plt.legend(handles=[patch],bbox_to_anchor=(1.41, 1), handlelength=3)
ax.add_artist(legend_1)
plt.show()
fig.savefig('A1_V1_bars_adult_mac_enrich_prop_main.pdf', bbox_inches = 'tight',dpi = 300)

In [None]:
from matplotlib import pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.collections import PolyCollection
from matplotlib.legend_handler import HandlerTuple
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D
import matplotlib as mpl
import matplotlib.patches as mpatches

plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(15,10))

data = data
x = 'organ'
y = 'scaled_pre_agm_mac_score'
split = False #'LVL5'
save_name = './v6_2_colors_col_adult_ordered_Violin_organ_wise_tlf_distribution.pdf'
vmin = -5
vmax = 5
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])
jitter = False
use_custom_color_dict = True


if use_custom_color_dict == False:
    sns.set_palette(palette = sns.color_palette())
    if split == False:
        ax = sns.violinplot(x=y, y=x, data=data,split= False,size=5,width = 1, split_palette=False,cut=0,orient = 'h',bw=0.2)#palette = []
    else:
        ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1.5, split_palette=True,cut=0,)#palette = []
        # add hatched legend
        circ1 = mpatches.Patch( facecolor=(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),alpha=0.8,label='All Macs')
        circ2 = mpatches.Patch( facecolor=(1.0, 0.4980392156862745, 0.054901960784313725),alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')
else:
    color_key = col_dict 
    if split == False:
        ax = sns.violinplot(x=y, y=x, data=data,split= False,size=5,width = 1, split_palette=False,cut=0,orient = 'h',bw=0.2, palette=col_dict)#palette = []
        markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in col_dict.values()]
        legend2 = plt.legend(markers, color_key.keys(), numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.5))
        ax.add_artist(legend2)
    else:
        ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1, split_palette=True,cut=0, palette=['.2', '.5'])#palette = []
        colors = list(col_dict.values()) + list(col_dict.values())
        handles = []
        for ind, violin in enumerate(ax.findobj(PolyCollection)):
            rgb = to_rgb(colors[ind // 2])
            if ind % 2 != 0:
                rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
            violin.set_facecolor(rgb)
            handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))#hatch=r'\\\\')
            # set marker colors
            markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in col_dict.values()]
            legend2 = plt.legend(markers, color_key.keys(), numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.5))
            ax.add_artist(legend2)
            plt.setp(legend2.get_title())#,fontsize='small'
            # add hatched legend
            circ1 = mpatches.Patch( facecolor='#808080',alpha=0.8,label='Mac')
            circ2 = mpatches.Patch( facecolor='#808080',alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')
    if jitter == True:
        ax = sns.stripplot(x=x, y=y, data=data_striplot,jitter=0.2, zorder=1,alpha=.5,size=1,color = 'grey')#,palette =col_dict ) #palette =col_dict 

# for i, violin in enumerate(ax.findobj(mpl.collections.PolyCollection)):
#     if i % 2:
#         violin.set_hatch("//")
# plt.ylim([vmin, vmax])        
# red_line = Line2D([0], [0], color='red', lw=3, label='K Selected',linestyle='--')
#legend1 = plt.legend(handles=[tuple(handles[::2]), tuple(handles[1::2]),red_line], labels=data["LVL5"].cat.categories.to_list(),title="Pre-AGM mac module enriched", handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},bbox_to_anchor=(1.21, 1))

# legend3 = plt.legend(handles=[circ1], numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.3))#circ2
# #ax.add_artist(legend1)
# ax.add_artist(legend3)

plt.axvline(x = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])#plt.axhline(y = np.mean(adata_macs.obs['scaled_pre_agm_mac_score']), color = 'r', linestyle = ':')
plt.title("ADLT Organ-wise Pre-AGM YS Mac module distribution",fontsize = 20,y=1.05,fontweight='bold')
plt.xlabel("variance-scaled Pre-AGM module score",fontsize = 20,fontweight='bold')
plt.ylabel("Organs",fontsize = 20,fontweight='bold')
plt.savefig((save_name),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
col_dict

In [None]:
from matplotlib import pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.collections import PolyCollection
from matplotlib.legend_handler import HandlerTuple
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D
import matplotlib as mpl
import matplotlib.patches as mpatches

plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(15,11))

data = data
x = 'organ'
y = 'scaled_pre_agm_mac_score'
split = False #'LVL5'
save_name = './v6_2_colors_col_adult_ordered_Violin_organ_wise_tlf_distribution.pdf'
vmin = -5
vmax = 5
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])
jitter = False
use_custom_color_dict = True


if use_custom_color_dict == False:
    sns.set_palette(palette = sns.color_palette())
    if split == False:
        ax = sns.violinplot(x=y, y=x, data=data,split= False,size=5,width = 1, split_palette=False,cut=0,orient = 'h',bw=0.2)#palette = []
    else:
        ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1.5, split_palette=True,cut=0,)#palette = []
        # add hatched legend
        circ1 = mpatches.Patch( facecolor=(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),alpha=0.8,label='All Macs')
        circ2 = mpatches.Patch( facecolor=(1.0, 0.4980392156862745, 0.054901960784313725),alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')
else:
    color_key = col_dict 
    if split == False:
        ax = sns.violinplot(x=y, y=x, data=data,split= False,size=5,width = 1, split_palette=False,cut=0,orient = 'h',bw=0.2, palette=['.2', '.5'])#palette = []
    else:
        ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1, split_palette=True,cut=0, palette=['.2', '.5'])#palette = []
    if jitter == True:
        ax = sns.stripplot(x=x, y=y, data=data_striplot,jitter=0.2, zorder=1,alpha=.5,size=1,color = 'grey')#,palette =col_dict ) #palette =col_dict 
    colors = list(col_dict.values()) + list(col_dict.values())
    handles = []
    
    for ind, violin in enumerate(ax.findobj(PolyCollection)):
        rgb = to_rgb(colors[ind // 2])
        if ind % 2 != 0:
            rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
        violin.set_facecolor(rgb)
        handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))#hatch=r'\\\\')
        # set marker colors
        markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in col_dict.values()]
        legend2 = plt.legend(markers, color_key.keys(), numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.5))
        ax.add_artist(legend2)
        plt.setp(legend2.get_title())#,fontsize='small'
        # add hatched legend
        circ1 = mpatches.Patch( facecolor='#808080',alpha=0.8,label='Mac')
        circ2 = mpatches.Patch( facecolor='#808080',alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')

# for i, violin in enumerate(ax.findobj(mpl.collections.PolyCollection)):
#     if i % 2:
#         violin.set_hatch("//")
# plt.ylim([vmin, vmax])        
# red_line = Line2D([0], [0], color='red', lw=3, label='K Selected',linestyle='--')
#legend1 = plt.legend(handles=[tuple(handles[::2]), tuple(handles[1::2]),red_line], labels=data["LVL5"].cat.categories.to_list(),title="Pre-AGM mac module enriched", handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},bbox_to_anchor=(1.21, 1))

#legend3 = plt.legend(handles=[circ1], numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.3))#circ2
#ax.add_artist(legend1)
#ax.add_artist(legend3)

plt.axvline(x = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])#plt.axhline(y = np.mean(adata_macs.obs['scaled_pre_agm_mac_score']), color = 'r', linestyle = ':')
plt.title("ADLT Organ-wise Pre-AGM YS Mac module distribution",fontsize = 20,y=1.05,fontweight='bold')
plt.xlabel("variance-scaled TLF+ module score",fontsize = 20,fontweight='bold')
plt.ylabel("Organs",fontsize = 20,fontweight='bold')
plt.savefig((save_name),dpi=300,bbox_inches='tight')
plt.show()

# plot a barplot for proportion of enriched cells


In [None]:
org_order = ['fat',
 'brain',
 'vasculature',
 'bladder',
 'muscle',
 'heart',
 'lymph_node',
 'thymus',
 'liver',
 'omentum',
 'bone_marrow',
'trachea',
 'gut',
 'skin',
 'uterus',
 'spleen',
 'prostate',
 'lung']

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)    
bars = ax.bar(x = counts.index, height = counts['prop'], width = width, bottom=None, align='center', data=None,color = '#FA8072', edgecolor='b')
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
ax.set_ylim([0, max(counts['prop'])+0.1])
ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.set_yticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(False)
# ax.spines['bottom'].set_visible(False)

patch = mpatches.Patch(color='#FA8072', label='proportion_mac_enriched')
legend_1 = plt.legend(handles=[patch],bbox_to_anchor=(1.41, 1), handlelength=3)
ax.add_artist(legend_1)
plt.show()
fig.savefig('A1_V1_bars_adult_mac_enrich_prop_main.pdf', bbox_inches = 'tight',dpi = 300)

In [None]:
adata_plot.write('adata_plor_adult.h5ad')

In [None]:
adata_plot = sc.read('./adata_plor_adult.h5ad')

In [None]:
adata_plot_orig = adata_plot[:]

In [None]:
adata_plot = adata_plot_orig[:]

In [None]:
adata_plot.obs['LVL_panimmune'].unique()

In [None]:
adata_plot.obs['LVL_panimmune'] = adata_plot.obs['LVL_panimmune'].astype(str)
adata_plot.obs.loc[(~adata_plot.obs['organ'].isin(['liver'])) & (adata_plot.obs['LVL_panimmune'].isin(['MACROPHAGE_KUPFFER_LIKE'])),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'

In [None]:
enr_rank = pd.DataFrame(adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])].groupby(['organ','LVL_panimmune']).apply(len)).reset_index()
enr_rank.columns = ['organ', 'LVL_panimmune','count']
enr_rank['organ'].astype('category').cat.reorder_categories(org_order)
prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})

prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count'] = prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count']**1/4
pcts = prop.copy()

# Change: groupby and divide by sum
prop_x = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})
pcts_x = prop_x.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
pcts_x = pcts_x.reset_index()



In [None]:
prop_x.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))

In [None]:
enr_rank = pd.DataFrame(adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])].groupby(['organ','LVL_panimmune']).apply(len)).reset_index()
enr_rank.columns = ['organ', 'LVL_panimmune','count']
enr_rank['organ'].astype('category').cat.reorder_categories(org_order)
prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})

prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count'] = prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count']**(1/4)
pcts = prop.copy()

# Change: groupby and divide by sum
prop_x = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})
pcts_x = prop_x.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
pcts_x = pcts_x.reset_index()
pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count']
pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count']


In [None]:
pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'
pcts.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()





In [None]:
enr_rank['LVL_panimmune'].unique()

In [None]:
enr_rank = pd.DataFrame(adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])].groupby(['organ','LVL_panimmune']).apply(len)).reset_index()
enr_rank.columns = ['organ', 'LVL_panimmune','count']
enr_rank['organ'].astype('category').cat.reorder_categories(org_order)
prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})

prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count'] = prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count']**(1/4)
pcts = prop.copy()

# Change: groupby and divide by sum
prop_x = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})
pcts_x = prop_x.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
pcts_x = pcts_x.reset_index()

#If prop <0.1, 10% of enriched population, remove or add to largest cat
#pcts.loc[pcts_x['count']<5,'count'] = 0
#pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'
#pcts.loc[pcts['organ'].isin(pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'organ']) & (pcts_x['LVL_panimmune'] == 'MACROPHAGE_LYVE1_HIGH') ,'count'] = pcts.loc[pcts['organ'].isin(pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'organ']) & (pcts_x['LVL_panimmune'] == 'MACROPHAGE_LYVE1_HIGH') ,'count'] + pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count']
pcts.loc[(pcts_x['count']<5) & (pcts_x['count']>0),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'
#Remove any less than 10 cells, must be minimum of 5 cells
pcts.loc[pcts['count']<5,'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'

pcts = pcts.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
# pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count'] = 0


# pcts['count'] = pcts['count']+1
#pcts['count'] = np.log10(pcts['count'])
pcts_mat = pcts.pivot(index = 'organ', columns = 'LVL_panimmune', values='count')
pcts_mat

In [None]:
pcts_mat = pcts_mat[['MACROPHAGE_ALVEOLAR', 'MACROPHAGE_INTERSTITIAL','MACROPHAGE_MICROGLIA_BAMS',
       'MACROPHAGE_KUPFFER_LIKE', 'MACROPHAGE_LYVE1_HIGH',
       ]]

In [None]:
pcts_mat.to_csv('pcts_mat_adult.csv')

In [None]:
pcts_mat = pd.read_csv('pcts_mat_adult.csv')

In [None]:
org_order = ['fat',
  'vasculature',
 'muscle',
'brain',
 'bladder',
 'heart',
 'lymph_node',
 'thymus',
 'omentum',
'bone_marrow',
'trachea',
  'gut',
 'skin',
'liver',
 'uterus',
 'spleen',
 'prostate',
 'lung']

In [None]:
pcts_mat = pcts_mat.set_index('organ').T[org_order].T
pcts_mat = pcts_mat.reset_index()

In [None]:
list((np.sum(pcts_mat,axis = 1)).astype(int))

In [None]:
plt.rcdefaults()

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
#counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',     '#EE943E',   '#E0EE70',  '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(2, 1.05))
plt.savefig('./bars_count_enriched.pdf')
plt.show()

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
#counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',     '#EE943E',   '#E0EE70',  '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
# ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(2, 1.05))
plt.savefig('./bars_count_enriched_with_x.pdf')
plt.show()

# V2 black and gray

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig('./bars_count_enriched.pdf')
plt.show()

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
# ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig('./bars_count_enriched_with_x.pdf')
plt.show()

# Compute and plot by proportion

In [None]:
enr_rank = pd.DataFrame(adata_plot.obs.loc[adata_plot.obs['LVL5'].isin(['MACROPHAGE_pre_agm_hi'])].groupby(['organ','LVL_panimmune']).apply(len)).reset_index()
enr_rank.columns = ['organ', 'LVL_panimmune','count']
enr_rank['organ'].astype('category').cat.reorder_categories(org_order)
prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})

prop = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count'] = prop.loc[(prop['LVL_panimmune'].isin(['MACROPHAGE_MHCII_HIGH'])) & (prop['count']>1),'count']**(1/4)
pcts = prop.copy()

# Change: groupby and divide by sum
prop_x = enr_rank.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'})
pcts_x = prop_x.groupby(level=0).apply(lambda x:100 * x / float(x.sum()))
pcts_x = pcts_x.reset_index()

#If prop <0.1, 10% of enriched population, remove or add to largest cat
#pcts.loc[pcts_x['count']<5,'count'] = 0
#pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'
#pcts.loc[pcts['organ'].isin(pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'organ']) & (pcts_x['LVL_panimmune'] == 'MACROPHAGE_LYVE1_HIGH') ,'count'] = pcts.loc[pcts['organ'].isin(pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'organ']) & (pcts_x['LVL_panimmune'] == 'MACROPHAGE_LYVE1_HIGH') ,'count'] + pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count']
pcts.loc[(pcts_x['count']<5) & (pcts_x['count']>0),'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'
#Remove any less than 10 cells, must be minimum of 5 cells
pcts.loc[pcts['count']<5,'LVL_panimmune'] = 'MACROPHAGE_LYVE1_HIGH'

pcts = pcts.groupby(['organ', 'LVL_panimmune']).agg({'count': 'sum'}).reset_index()
# pcts.loc[(pcts_x['count']<15) & (pcts_x['count']>0),'count'] = 0


# pcts['count'] = pcts['count']+1
#pcts['count'] = np.log10(pcts['count'])
pcts_mat = pcts.pivot(index = 'organ', columns = 'LVL_panimmune', values='count')
pcts_mat = ((pcts_mat.T/pcts_mat.T.sum())*100).T

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig('./pecentage_bars_count_enriched.pdf')
plt.show()

In [None]:
from matplotlib import rcParams
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
width = 0.6
fig_sz = plt.gcf()
size = fig_sz.get_size_inches()
counts = rank_prop
fig, ax = plt.subplots(figsize=(size[1],3),frameon=False)   

col_pal = ['#94BFB1', '#ff0000',     '#B49EC8',    '#E0EE70',    '#EE943E',    '#4C7BAB',    '#E78AB8',    '#AFBFCC',    '#A86458',    "#FFFF00"]
pcts_mat.plot.bar(rot=0,stacked = True,color = col_pal,ax=ax)
ax.tick_params(axis='x', labelrotation=90)
ax.tick_params(axis='y', labelrotation=90)
# ax.set_ylim([0, max(counts['prop'])+0.1])
# ax.bar_label(bars,fontsize=10,rotation=90,padding = 3)
ax.yaxis.tick_right()
# ax.set_yticks([])
# ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['right'].set_visible(True)
ax.legend(bbox_to_anchor=(1.1, 1.05))
plt.savefig('./percentage_bars_count_enriched_with_x.pdf', bbox_inches = 'tight')
plt.show()

# Save boolean indices

In [None]:
import pandas as pd

In [None]:
adata.obs.to_csv('pan_adult_myeloid_atlas_obs.csv')
obs = adata.obs[['organ','donor','LVL_panimmune']]

In [None]:
#obs = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/Adult_myeloid_atlas_obs_for_supp.csv',index_col = 0)
indices = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/adult_atlas_pre_agm_hi_mac_indices.csv',index_col = 0)

In [None]:
obs['Pre_AGM_enriched'] = 'False'
obs.loc[obs.index.isin(indices.index),'Pre_AGM_enriched'] = indices['LVL5']
obs.loc[obs.index.isin(indices.index),'Pre_AGM_enriched'] = obs.loc[obs.index.isin(indices.index),'Pre_AGM_enriched'].str.replace('MACROPHAGE_pre_agm_hi','True')
obs.loc[obs.index.isin(indices.index),'Pre_AGM_enriched'] = obs.loc[obs.index.isin(indices.index),'Pre_AGM_enriched'].str.replace('MAC','False')

In [None]:
list(obs['organ'].unique())

In [None]:
obs.to_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/ling_adult_macs/Adult_myeloid_atlas_obs_for_supp.csv')

In [None]:
pcts.plot.bar(stacked=True)

In [None]:
pcts['organ'] = pcts['organ'].cat.reorder_categories(org_order)

In [None]:
list(pcts['organ'].cat.categories)

In [None]:
# ax = sns.violinplot(x=x, y=y, data=data,split= True,size=5,width = 1, split_palette=False,cut=0,)#
# ax

adata.obs# Let's plot prportion of each mac subtype that is in enrichment

In [None]:
adata.obs.groupby(['organ','LVL5']).apply(len)

In [None]:
count_df['LVL5']

In [None]:
count_df = pd.DataFrame(adata.obs.groupby(['organ','LVL3','LVL5']).apply(len)).reset_index()
prop_df = pd.DataFrame(index = count_df['organ'].unique(),columns = list(adata.obs['LVL3'].unique()))
count_df.columns = ['organ','LVL3','LVL5','counts']
for organ in count_df.organ.unique():
    print(organ)
    temp_df = count_df[count_df.organ.isin([organ])]
    # what is the proportion of TLF hi by subset
    temp_df['prop'] = 'na'
    for lvl in temp_df['LVL3'].unique():
        try:
            prop_df.loc[prop_df.index.isin([organ]),lvl] =  temp_df.loc[temp_df['LVL3'].isin([lvl]),'counts'].iloc[1]/ np.sum(temp_df.loc[temp_df['LVL3'].isin([lvl]),'counts'])
        except:
            prop_df.loc[prop_df.index.isin([organ]),lvl] = 0 
prop_df

In [None]:
prop_df = prop_df.astype('float')
# prop_df = prop_df[[
# 'MACROPHAGE_PRE_MAC',
# 'MACROPHAGE_MICROGLIA',
# 'MACROPHAGE_LYVE1_HIGH',
# 'MACROPHAGE_MHCII_HIGH',
# 'MACROPHAGE_IRON_RECYCLING',
# 'MACROPHAGE_KUPFFER_LIKE',
# 'MACROPHAGE_PROLIFERATING',
# 'MACROPHAGE_ERY',
# 'MACROPHAGE_PERI',]]
prop_df = prop_df.loc[org_order]

In [None]:
prop_df

In [None]:
#prop_df = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/rebuttal_figs_010922/01_Fetal_YS_MAC_across_organs/1_5_MAC_TLF_line_violin_plot/V3_plots/prop_enrich.csv',index_col = 0)

In [None]:
crs_tbl = prop_df.copy()

In [None]:
# Sort df columns by rows
plt.rcdefaults()
# index_order = list(crs_tbl.max(axis=1).sort_values(ascending=False).index)
col_order = list(crs_tbl.max(axis=0).sort_values(ascending=False).index)
# crs_tbl = crs_tbl.loc[index_order]
crs_tbl = crs_tbl[col_order]

# Plot_df_heatmap(crs_tbl, cmap='coolwarm', rotation=90, vmin=20, vmax=70)
pal = sns.color_palette("YlOrBr", as_cmap=True)#sns.color_palette("magma", as_cmap=True)#sns.diverging_palette(240, 10, n=10)
plt.figure(figsize=(30,30))
sns.set(font_scale=1)
g = sns.heatmap(crs_tbl, cmap=pal,  annot=False,vmin=0, vmax=0.8, linewidths=1, center=0.4, square=True, cbar_kws={"shrink": 0.5})
g.xaxis.set_ticks_position("top")

plt.xticks(rotation=90,fontsize = 15)
plt.yticks(rotation=0,fontsize = 15)
plt.ylabel("Organ labels", fontsize=30)
plt.xlabel("Mac_substates", fontsize=30)
# plt.show()
plt.savefig('./ver5_proportion_subtype_enrich.pdf',dpi=300)
plt.show()

In [None]:
adata_mac.obs['organ_LVL3'] = adata_mac.obs['organ_uni'].astype(str) + '_'+ adata_mac.obs['IG_annot'].astype(str)
sc.pp.normalize_total(adata_mac, target_sum=10000)

In [None]:
markers = {
'MACROPHAGE_LYVE1_HIGH': ['RNASE1', 'PLTP', 'F13A1', 'LYVE1', 'CD163'],
 'MACROPHAGE_MHCII_HIGH': ['HLA-DRA',
  'HLA-DPA1',
  'CD74',
  'HLA-DPB1',
  'HLA-DRB1'],
 'MACROPHAGE_ERY': ['SMAP2', 'RIPOR2', 'IGHA1', 'IL1R2', 'TNFAIP3'],
 'MACROPHAGE_KUPFFER_LIKE': ['SLC40A1', 'CD5L', 'SELENOP', 'HMOX1', 'C1QC'],
 'MACROPHAGE_MICROGLIA': ['NEAT1', 'DDX17', 'SPP1', 'CLDN5'],
 'OSTEOCLAST': ['GPNMB', 'APOE', 'ACP5', 'CHIT1', 'CSTB'],
 'MACROPHAGE_PERI': ['CCL3', 'CXCL2', 'CTSL', 'NFKBIA', 'CSTB'],
 'MACROPHAGE_PROLIFERATING': ['STMN1', 'TUBA1B', 'H2AFZ', 'TUBB', 'HMGN2'],
}

order = [
 'MACROPHAGE_LYVE1_HIGH',
'MACROPHAGE_MHCII_HIGH',
 'MACROPHAGE_ERY',
 'MACROPHAGE_KUPFFER_LIKE',
'MACROPHAGE_MICROGLIA',
'OSTEOCLAST',
'MACROPHAGE_PERI',
 'MACROPHAGE_PROLIFERATING',]
plt.rcdefaults()
markers = dict([(key,markers[key]) for key in order if key in markers])
markers['pre_agm_module'] = [
 'LYVE1',
 'MRC1',
 'FOLR2',
 'NINJ1','TIMD4','TTR',
 'CGA',
 'AGR2',
 'FCGR1A',
 'CSH1']
adata_mac.obs['IG_annot'] = adata_mac.obs['IG_annot'].astype('category').cat.reorder_categories(order)
data_temp = adata_mac
dp = (sc.pl.dotplot(data_temp, var_names = markers, groupby='IG_annot',dendrogram=False,standard_scale='var', color_map='Reds', show = True, return_fig = True)) # title=i
dp.add_totals()
dp.savefig(('cross-organ_MAC_diff_expression_adult_atlas.pdf'), bboxinches = 'tight')
dp.show()

In [None]:
import seaborn as sns; sns.set(color_codes=True)
save_path = './'
pal = sns.color_palette("YlOrBr", as_cmap=True)#pal = sns.diverging_palette(240, 10, n=10)
plt.figure(figsize=(20,20))
sns.set(font_scale=0.8)
g = sns.heatmap(prop_df, cmap=pal, vmin=0, vmax=0.8, linewidths=1, center=0.4, square=True, cbar_kws={"shrink": 0.5})
plt.yticks(rotation=0,fontsize = 30)
plt.xticks(rotation=90,fontsize = 30)
# plt.xlabel("Original labels")
# plt.ylabel("Predicted labels")
plt.savefig("./prop_enrich.pdf",bbox_inches='tight')
prop_df.to_csv("./prop_enrich.csv")

In [None]:
temp_df

In [None]:
prop_df.loc[prop_df.index.isin([organ]),lvl] = 

In [None]:
temp_df

In [None]:
temp_df.loc[temp_df.index.isin([lvl]),'prop']

In [None]:
temp_df.loc[temp_df['LVL3'].isin([lvl]),'counts'].iloc[1]/np.sum(temp_df.loc[temp_df['LVL3'].isin([lvl]),'counts'])

In [None]:
temp_df

In [None]:
ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop','count'])
for organ in ranker['organ'].unique():
    rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
    rank_prop.loc[rank_prop.index.isin([organ]),'count'] = int(np.sum(ranker.loc[ranker['organ'].isin([organ])]))

In [None]:
prop = 0.1
(prop*(1-prop)/100)**(1/2)

In [None]:
rank_prop['error'] = ((rank_prop['prop'] * (1- rank_prop['prop'] ))/rank_prop['count'])**(1/2)


In [None]:
rank_prop.sort_values('error',ascending = True)

In [None]:
ranker = data.groupby(['organ','LVL5']).apply(len).reset_index()
rank_prop = pd.DataFrame(index = ranker['organ'].unique(),columns = ['prop'])
for organ in ranker['organ'].unique():
    rank_prop.loc[rank_prop.index.isin([organ]),'prop'] =  ranker.loc[ranker['organ'].isin([organ]),0].iloc[1]/ np.sum(ranker.loc[ranker['organ'].isin([organ]),0])
rank_prop = rank_prop.sort_values('prop',ascending = False)
data['organ'] = data['organ'].cat.reorder_categories(list(rank_prop.index))
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])

In [None]:
rank_prop

In [None]:
ranker

In [None]:
from matplotlib import pyplot as plt
from matplotlib.colors import to_rgb
from matplotlib.collections import PolyCollection
from matplotlib.legend_handler import HandlerTuple
import seaborn as sns
import numpy as np
from matplotlib.lines import Line2D
import matplotlib as mpl
import matplotlib.patches as mpatches

plt.rcdefaults()
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(15,10))

data = data
x = 'organ'
y = 'scaled_pre_agm_mac_score'
split = 'LVL5'
save_name = './v5_multi_color_ordered_Violin_organ_wise_tlf_distribution.pdf'
col_dict = dict([(key, col_dict[key]) for key in rank_prop.index if key in col_dict])
jitter = False
use_custom_color_dict = True

if use_custom_color_dict == False:
    sns.set_palette(palette = sns.color_palette())
    ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1, split_palette=True,cut=0,)#palette = []
    # add hatched legend
    circ1 = mpatches.Patch( facecolor=(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),alpha=0.8,label='Mac')
    circ2 = mpatches.Patch( facecolor=(1.0, 0.4980392156862745, 0.054901960784313725),alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')
else:
    ax = sns.violinplot(x=x, y=y,hue = split, data=data,split= True,size=5,width = 1, split_palette=True,cut=0, palette=['.2', '.5'])#palette = []
    if jitter == True:
        ax = sns.stripplot(x=x, y=y, data=data_striplot,jitter=0.2, zorder=1,alpha=.5,size=1,color = 'grey')#,palette =col_dict ) #palette =col_dict 
    colors = list(col_dict.values()) + list(col_dict.values())
    handles = []
    for ind, violin in enumerate(ax.findobj(PolyCollection)):
        rgb = to_rgb(colors[ind // 2])
        if ind % 2 != 0:
            rgb = 0.5 + 0.5 * np.array(rgb)  # make whiter
        violin.set_facecolor(rgb)
        handles.append(plt.Rectangle((0, 0), 0, 0, facecolor=rgb, edgecolor='black'))#hatch=r'\\\\')
        # set marker colors
        markers = [plt.Line2D([0,0],[0,0],color=color, marker='o', linestyle='') for color in col_dict.values()]
        legend2 = plt.legend(markers, color_key.keys(), numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.5))
        ax.add_artist(legend2)
        plt.setp(legend2.get_title())#,fontsize='small'
        # add hatched legend
        circ1 = mpatches.Patch( facecolor='#808080',alpha=0.8,label='Mac')
        circ2 = mpatches.Patch( facecolor='#808080',alpha=0.8,hatch=r'\\\\',label='Pre-AGM_module_enriched')

for i, violin in enumerate(ax.findobj(mpl.collections.PolyCollection)):
    if i % 2:
        violin.set_hatch("//")
        
# red_line = Line2D([0], [0], color='red', lw=3, label='K Selected',linestyle='--')
#legend1 = plt.legend(handles=[tuple(handles[::2]), tuple(handles[1::2]),red_line], labels=data["LVL5"].cat.categories.to_list(),title="Pre-AGM mac module enriched", handlelength=4, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},bbox_to_anchor=(1.21, 1))

legend3 = plt.legend(handles=[circ1,circ2], numpoints=1,loc='center left', bbox_to_anchor=(1.01, 0.3))
#ax.add_artist(legend1)
ax.add_artist(legend3)

plt.axhline(y = 0, color = 'r', linestyle = ':')#np.mean(adata_macs.obs['scaled_pre_agm_mac_score'])#plt.axhline(y = np.mean(adata_macs.obs['scaled_pre_agm_mac_score']), color = 'r', linestyle = ':')
plt.title("Organ-wise Pre-AGM YS Mac module distribution",fontsize = 20,y=1.05,fontweight='bold')
plt.ylabel("variance-scaled TLF+ module score",fontsize = 20,fontweight='bold')
plt.xlabel("Organs",fontsize = 20,fontweight='bold')
plt.savefig((save_name),dpi=300,bbox_inches='tight')
plt.show()

In [None]:
obs = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/YS/YS_data/YS_panf_gonads_brain_eliv_combined_060922/A1_Vx_pan_organ_integrations/OBS_A1_V10_raw_scvi_YS_updated_panf_gonads_brain_build_donor_organ_corrected_031022.csv',index_col = 0)

In [None]:
obs['LVL4'][obs['LVL4'].str.contains('ELP')]

In [None]:
obs['LVL4']

In [None]:
obs2 = pd.read_csv('/nfs/team205/ig7/work_backups/backup_210306/projects/Pan_fetal/IG_anno_lvl_2_final_clean_051121.csv',index_col = 0)

In [None]:
obs2[obs2['anno_lvl_2_final_clean'].str.contains('ELP')]

In [None]:
obs2[obs2['anno_lvl_2_final_clean'].str.contains('ETP')]