In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as pl
#from matplotlib import rcParams
import scanpy as sc
import os
import anndata
#from sklearn.mixture import GaussianMixture
import matplotlib
matplotlib.rcParams['pdf.fonttype']=42
matplotlib.rcParams['ps.fonttype']=42

In [None]:
# scanpy settings
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=150, frameon=False, figsize=(4, 4)) 

In [None]:
# readin the information table
sampleInfo = pd.read_csv(os.path.join('/Users/tan/Ionctura-collab/data/', 'Sample info cut all limit 10 timepoints.csv'), 
                         dtype={'Facility barcode/Sample ID':str})

In [None]:
sampleInfo.columns

In [None]:
# merge all
selectInfo = sampleInfo
selectInfo['Protocol Visit Code/Time points '] = selectInfo['Protocol Visit Code/Time points '].str.replace('*','',regex=False)
dataDir = '/Users/tan/Ionctura-collab/data/preprocessed'
labelDir = '/Users/tan/Ionctura-collab/data/preprocessed/labels'
    # readin and merge the file according to "selectInfo" as an anndata object
labelList = []
adataList = []
for i in range(len(selectInfo)):
    # label
    labelTmp = pd.read_csv(os.path.join(labelDir, str(selectInfo['Facility barcode/Sample ID'].iloc[i]) + '.csv'))
    labelTmp['visit'] = np.repeat(selectInfo['Protocol Visit Code/Time points '].iloc[i], len(labelTmp.index))
    labelTmp['subject'] = np.repeat(str(selectInfo['Patient ID'].iloc[i]), len(labelTmp.index))
    labelTmp['cohort'] = np.repeat(str(selectInfo['Cohort and dose'].iloc[i]), len(labelTmp.index))
    labelTmp['batch'] = np.repeat(str(selectInfo['Batch #'].iloc[i]), len(labelTmp.index))
    labelTmp['tumor'] = np.repeat(str(selectInfo['Tumor type'].iloc[i]), len(labelTmp.index))
    labelTmp['sampleID'] = np.repeat(str(selectInfo['Facility barcode/Sample ID'].iloc[i]), len(labelTmp.index))
    # data
    adataTmp = pd.read_csv(os.path.join(dataDir, str(selectInfo['Facility barcode/Sample ID'].iloc[i]) + '.csv'))
    # filter the cells without a level1 tag
    adataTmp = adataTmp[labelTmp['level1']!=' ']
    labelTmp = labelTmp[labelTmp['level1']!=' ']
    # remove EQBeads and DNA
    # adataTmp.drop(columns=['EQBeads', 'DNA-Ir191', 'DNA-Ir193'], inplace=True)
    adataTmp.drop(columns=['102Pd', '104Pd', '105Pd', '106Pd', '108Pd',
                            '116Cd', '131Xe', '133Cs',
                            '191Ir', '193Ir'], inplace=True)
    # prepare the list
    labelList.append(labelTmp)
    adataList.append(adataTmp)
    # create anndata object from the list
adata = anndata.AnnData(pd.concat(adataList, ignore_index=True))
label = pd.concat(labelList, ignore_index=True)
adata.obs = label
    # scale 
sc.pp.scale(adata)

In [None]:
adata.obs['sampleID'].value_counts()

In [None]:
adata[adata.obs['level3'].isin(['CD39 Memory Tregs', 'Memory Tregs', 'Naive Tregs'])]

In [None]:
for subName in ['Treg', 'NK-cells','Monocytes', 'Eosinophils', 
                'B-cells', 'CD4 T', 'CD8 T']: 
    # save figures to a sub dir
    figpath=os.path.join('./figures_pdf', subName)
    os.makedirs(figpath, exist_ok=True)
    sc.settings.figdir=figpath
    
    # choose subpopulation and select features
    if subName in adata.obs['level1'].unique().tolist():
        adataSub = adata[adata.obs['level1']==subName, :]
    elif subName in adata.obs['level2'].unique().tolist():
        adataSub = adata[adata.obs['level2']==subName, :]
    elif subName == 'Treg':
        adataSub = adata[adata.obs['level3'].isin(['CD39 Memory Tregs', 'Memory Tregs', 'Naive Tregs'])]
    else:
        raise Exception("unknown sub pop")

    n_comps = min([adataSub.n_obs, adataSub.n_vars, 21])-1
    sc.tl.pca(adataSub, svd_solver='arpack', n_comps=n_comps)
    sc.pp.neighbors(adataSub, n_neighbors=10, n_pcs=n_comps)

    # umap visualization
    #sc.tl.umap(adataSub)
    #sc.pl.umap(adataSub, color='time point')
    
    # paga process
    sc.tl.leiden(adataSub, resolution=0.5)
    sc.tl.paga(adataSub, groups='leiden')
    sc.pl.paga(adataSub, color=['leiden'], threshold=0.1, show=False, 
                save='_' + subName + '.pdf')
    # calculate fa layout with paga as initial position
    sc.tl.draw_graph(adataSub, init_pos='paga', layout='fa')
    sc.pl.draw_graph(adataSub, color=['visit', 'leiden', 'cohort', 'subject','batch'], show=False,
                    save='_' + subName + '.pdf')
    # marker expression
    sc.pl.draw_graph(adataSub, color=adataSub.var.index.values, show=False,
                    save='_' + subName + '_marker_expressions.pdf')
    # make a copy for cohort-wise density embedding later
    adataSubCohort = adataSub
    
    # overall density
    #sc.tl.embedding_density(adataSub, basis='draw_graph_fa', groupby='visit')
    #sc.pl.embedding_density(adataSub, basis='draw_graph_fa', key='draw_graph_fa_density_visit', 
    #                        group=sorted(adataSub.obs['visit'].unique().tolist()),
    #                        save='_' + subName + '_density_visit.pdf')
    #sc.tl.embedding_density(adataSub, basis='draw_graph_fa', groupby='cohort')
    #sc.pl.embedding_density(adataSub, basis='draw_graph_fa', key='draw_graph_fa_density_cohort', 
    #                        group=adataSub.obs['cohort'].unique().tolist(),
    #                        save='_' + subName + '_density_cohort.pdf') 
    sc.tl.embedding_density(adataSub, basis='draw_graph_fa', groupby='tumor')
    sc.pl.embedding_density(adataSub, basis='draw_graph_fa', key='draw_graph_fa_density_tumor', 
                            group=adataSub.obs['tumor'].unique().tolist(),
                            save='_' + subName + '_density_tumor.pdf') 
    # cohort-wise density
    for cohortName in adataSub.obs['cohort'].unique().tolist():
        tmp = adataSubCohort[adataSubCohort.obs['cohort']==cohortName]
    #    sc.tl.embedding_density(tmp, basis='draw_graph_fa', groupby='visit')
    #    sc.pl.embedding_density(tmp, basis='draw_graph_fa', key='draw_graph_fa_density_visit',
    #                            bg_dotsize=0, group=sorted(tmp.obs['visit'].unique().tolist()),
    #                            save='_' + subName + '_density_cohort_' + cohortName + '_visit.pdf')
        sc.tl.embedding_density(tmp, basis='draw_graph_fa', groupby='tumor')
        sc.pl.embedding_density(tmp, basis='draw_graph_fa', key='draw_graph_fa_density_tumor',
                                bg_dotsize=0, group=sorted(tmp.obs['tumor'].unique().tolist()),
                                save='_' + subName + '_density_cohort_' + cohortName + '_tumor.pdf')       
    

In [None]:
# subject-wise
for patientID in ['3901-04', '3901-05']:
    selectInfo = sampleInfo[sampleInfo['Patient ID'].isin([patientID])]
    selectInfo['Protocol Visit Code/Time points '] = selectInfo['Protocol Visit Code/Time points '].str.replace('*','',regex=False)
    dataDir = '/Users/tan/Ionctura-collab/data/preprocessed'
    labelDir = '/Users/tan/Ionctura-collab/data/preprocessed/labels'
    # readin and merge the file according to "selectInfo" as an anndata object
    labelList = []
    adataList = []
    for i in range(len(selectInfo)):
    # label
        labelTmp = pd.read_csv(os.path.join(labelDir, str(selectInfo['Facility barcode/Sample ID'].iloc[i]) + '.csv'))
        labelTmp['visit'] = np.repeat(selectInfo['Protocol Visit Code/Time points '].iloc[i], len(labelTmp.index))
        labelTmp['subject'] = np.repeat(str(selectInfo['Patient ID'].iloc[i]), len(labelTmp.index))
    # data
        adataTmp = pd.read_csv(os.path.join(dataDir, str(selectInfo['Facility barcode/Sample ID'].iloc[i]) + '.csv'))
    # filter the cells without a level1 tag
        adataTmp = adataTmp[labelTmp['level1']!=' ']
        labelTmp = labelTmp[labelTmp['level1']!=' ']
    # remove EQBeads and DNA
    # adataTmp.drop(columns=['EQBeads', 'DNA-Ir191', 'DNA-Ir193'], inplace=True)
        adataTmp.drop(columns=['102Pd', '104Pd', '105Pd', '106Pd', '108Pd',
                               '116Cd', '131Xe', '133Cs',
                               '191Ir', '193Ir'], inplace=True)
    # prepare the list
        labelList.append(labelTmp)
        adataList.append(adataTmp)
    # create anndata object from the list
    adata = anndata.AnnData(pd.concat(adataList, ignore_index=True))
    label = pd.concat(labelList, ignore_index=True)
    adata.obs = label
    # scale 
    sc.pp.scale(adata)

    for subName in ['Neutrophils', 'Monocytes', 'abT-cells', 'Eosinophils', 
                    'NK-cells', 'B-cells', 'CD4 T', 'CD8 T']: 
    # save figures to a sub dir
        figpath=os.path.join('./figures_pdf', patientID, subName)
        os.makedirs(figpath, exist_ok=True)
        sc.settings.figdir=figpath
    
    # choose subpopulation and select features
        if subName in ['Neutrophils', 'Monocytes', 'abT-cells', 'Eosinophils', 'NK-cells', 'B-cells']:
            adataSub = adata[adata.obs['level1']==subName, :]
        elif subName in ['CD4 T', 'CD8 T']:
            adataSub = adata[adata.obs['level2']==subName, :]
        else:
            raise Exception("unknown sub pop")

        n_comps = min([adataSub.n_obs, adataSub.n_vars, 21])-1
        sc.tl.pca(adataSub, svd_solver='arpack', n_comps=n_comps)
        sc.pp.neighbors(adataSub, n_neighbors=10, n_pcs=n_comps)

    # umap visualization
    #sc.tl.umap(adataSub)
    #sc.pl.umap(adataSub, color='time point')
    
    # paga process
        sc.tl.leiden(adataSub, resolution=0.5)

        sc.tl.paga(adataSub, groups='leiden')
        sc.pl.paga(adataSub, color=['leiden'], threshold=0.1, show=False, 
                  save='_' + subName + '.pdf')
    
        sc.tl.draw_graph(adataSub, init_pos='paga')
    #sc.pl.draw_graph(adataSub, color=['time point', 'leiden', 'family'], show=False,
    #                save='_' + subName + '.png')
    # for PID analysis
        sc.pl.draw_graph(adataSub, color=['visit', 'leiden'], show=False,
                        save='_' + subName + '.pdf')
    
        sc.pl.draw_graph(adataSub, color=adataSub.var.index.values, show=False,
                        save='_' + subName + '_markers.pdf')

        sc.tl.embedding_density(adataSub, basis='draw_graph_fa', groupby='visit')
        sc.pl.embedding_density(adataSub, basis='draw_graph_fa', key='draw_graph_fa_density_visit', 
                                group=adata.obs['visit'].unique().tolist(),
                                save='_' + subName + '_density.pdf')