In [None]:
#load libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import copy
import seaborn as sns
import importlib
from matplotlib import cm
import matplotlib as mpl
mpl.rc('figure', max_open_warning = 0)
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
mpl.rcParams['mathtext.fontset'] = 'custom'
mpl.rcParams['mathtext.it'] = 'Arial:italic'
mpl.rcParams['mathtext.rm'] = 'Arial'
codedir = os.getcwd()

In [None]:
#load cmif libraries
#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF')
from mplex_image import visualize as viz, process, preprocess, gating

In [None]:
os.chdir(codedir)

## Notes

use CD45 to gate immune (CD3 more artifact)

update 20200402: add SMT-Bx2-5 and HTA-33, simplified gating.

## Load Data

In [None]:
#set location of files
#load data
rootdir = f'{codedir}/paper_data'
# go to location of files
os.chdir(rootdir)
preprocess.cmif_mkdir(['GatingPlots'])
#os.listdir()

In [None]:
# 3 define samples to work with/ image combos
ls_sample = ['20210402_SMT']

In [None]:
df_data = pd.DataFrame()
for s_sample in ls_sample:
    df_data = df_data.append(pd.read_csv(f'{s_sample}_ManualPositive.csv',index_col=0))

In [None]:
df_data.columns

In [None]:
d_rename = {'CD4':'CD4_Ring','CD8':'CD8_Ring',
           #'HER2':'HER2_Ring','ER':'ER_Nuclei'
           }
df_data = df_data.rename(d_rename, axis=1)

## Specify Gating Strategy

In [None]:
#parameters

# cell types
ls_endothelial = ['CD31']
ls_immune = ['CD45','CD68'] 
ls_tumor = ['CK7','CK19','Ecad'] 
ls_prolif = ['Ki67']

#tcell/myeloid
s_tcell = 'CD45' 
s_bcell = 'CD20'
s_myeloid = 'CD68'
ls_immune_functional = ['PD1','CD44','prolif'] # not in dataset: 'FoxP3_Nuclei','GRNZB_Nuclei',

#luminal/basal/mesenchymal
ls_luminal = ['CK19','CK7'] # not in dataset 'CK8_Ring'
ls_basal = ['CK5','CK14'] 
ls_mes = ['CD44', 'Vim'] 
ls_tumor_plus = ['Ecad'] + ['Lum']
ls_stromal_function =  ['Vim','aSMA','PDPN']
ls_tumor_prolif = ['PCNA','Ki67','pHH3']  

#index of cell line samples (i.e. 100% tumor)
ls_cellline_index = []


In [None]:
#custom gating
df_data = gating.main_celltypes(df_data,ls_endothelial,ls_immune,ls_tumor,ls_cellline_index)

In [None]:
df_data.columns

In [None]:
#add normal liver
df_data.loc[(~df_data.loc[:,ls_luminal].any(axis=1) & df_data.loc[:,'Ecad'] & df_data.loc[:,'tumor']),'celltype'] = 'epithelial'
df_data.loc[df_data.celltype == 'epithelial','tumor'] = False
df_data.loc[df_data.celltype == 'epithelial','epithelial'] = True
df_data.loc[df_data.celltype != 'epithelial','epithelial'] = False
df_data.epithelial = df_data.epithelial.astype('bool')

In [None]:
importlib.reload(gating)

## Perform Gating

In [None]:
%%capture
#simple gating
df_data = gating.proliferation(df_data,ls_prolif)
df_data = gating.immune_types(df_data,s_myeloid,s_bcell,s_tcell)
df_data = gating.cell_prolif(df_data)

In [None]:
%%capture
#cutom gating (skip)
'''
df_data = gating.immune_functional(df_data,ls_immune_functional)
df_data = gating.diff_hr_state(df_data,ls_luminal,ls_basal,ls_mes)
df_data = gating.celltype_gates(df_data,ls_tumor_prolif,s_new_name='TumorProlif',s_celltype='tumor')
#df_data = gating.celltype_gates(df_data,ls_tumor_plus,s_new_name='TumorDiffPlus',s_celltype='tumor')
df_data = gating.celltype_gates(df_data,ls_stromal_function,s_new_name='StromalType',s_celltype='stromal')
'''

In [None]:
df_data = gating.non_tumor(df_data)

## Output Gating Data

In [None]:
#check
ls_drop = ['ColI', 'ColIV', 'CD20', 'CD3', 'CD44', 'CK14',
       'CK5', 'ER', 'HER2', 'LamAC', 'PCNA', 'PD1', 'pHH3']
df_data.loc[:,df_data.dtypes=='object'].drop(ls_drop,axis=1)

In [None]:
#drop extra colums
df_gate = df_data.loc[:,df_data.dtypes!='bool'].drop(ls_drop,axis=1)

In [None]:
#handcrafted stromal populations (skip)
'''
d_rename_stroma = {'stromal_Vim_aSMA':'myofibroblast', 'stromal_aSMA':'myofibroblast', 'stromal___':'stromal', 'stromal_Vim':'fibroblast',
       'stromal_PDPN_Vim_aSMA':'myofibroblast', 'stromal_PDPN_Vim':'fibroblast', 'stromal_PDPN':'lymphatic',
       'stromal_PDPN_aSMA':'myofibroblast'}
df_gate.NonTumor = df_gate.NonTumor.replace(d_rename_stroma)
df_gate['FinalCell'] = df_gate.NonTumor.fillna(df_gate.CellProlif).fillna(df_gate.celltype)
df_gate.FinalCell = df_gate.FinalCell.replace({'tumor_nonprolif':'tumor','liver_nonprolif':'liver','liver_prolif':'liver'})
'''

In [None]:
df_gate.head()

In [None]:
s_out = '20210402_SMT'
if not os.path.exists(f'{s_out}_GatedPositiveCellNames.csv'):
    print('saving new csv')
    df_gate.to_csv(f'{s_out}_GatedPositiveCellNames.csv')

## Plot


In [None]:
#importlib.reload(viz)
s_out = '20210402_SMT'
f'{s_out}_GatedPositiveCellNames.csv'

In [None]:
df_data = pd.read_csv(f'{s_out}_GatedPositiveCellNames.csv',index_col=0)

In [None]:
#df_data['Stromal'] = df_data.StromalType.replace(d_rename_stroma)
#df_data['NonTumor'] = df_data.NonTumor.replace(d_rename_stroma)
#df_data['NonTumorFunc'] = df_data.NonTumorFunc.replace(d_rename_stroma)
#handcrafted stromal populations
#d_rename_stroma = {'stromal_Vim_aSMA':'myofibroblast', 'stromal_aSMA':'myofibroblast', 'stromal___':'stromal', 'stromal_Vim':'fibroblast',
#       'stromal_PDPN_Vim_aSMA':'myofibroblast', 'stromal_PDPN_Vim':'fibroblast', 'stromal_PDPN':'lymphatic',
#       'stromal_PDPN_aSMA':'myofibroblast'}

In [None]:
(df_data.columns == 'FinalCell').any()

In [None]:
#combined cell type (run once)
if not (df_data.columns == 'FinalCell').any():
    df_data.loc[df_data.celltype == 'tumor','FinalCell'] = df_data.loc[df_data.celltype == 'tumor','CellProlif']
    df_data.loc[df_data.celltype != 'tumor','FinalCell'] = df_data.loc[df_data.celltype != 'tumor','celltype']
    df_data.loc[df_data.celltype == 'immune','FinalCell'] = df_data.loc[df_data.celltype == 'immune','ImmuneType']

#df_data.FinalCell.unique()
#df_data.to_csv(f'{s_out}_GatedPositiveCellNames.csv')

In [None]:
ls_drop = df_data.loc[((df_data.index.str.contains('HTA')) & (df_data.FinalCell=='epithelial'))].index

In [None]:
# get rid epithelial
# except HTAN
df_data['FinalCell'] = df_data.FinalCell.replace({'epithelial':'stromal'})
df_data = df_data.drop(ls_drop)
df_data['countme'] = True


In [None]:
%matplotlib inline
s_grouper='slide_scene'

#calculate proportions
for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():
    df_prop = viz.prop_positive(df_data,s_cell=s_cell,s_grouper=s_grouper)
    # make annotations
    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)
    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))
    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.01,lut=lut)
    g.savefig(f'./GatingPlots/{s_cell}_clustermap.png',dpi=150)
    plt.close()
    fig = viz.prop_barplot(df_plot_less,s_cell,colormap="Spectral")
    fig.savefig(f'./GatingPlots/{s_cell}_bar.png',dpi=200)
    break

In [None]:
#group by tissue
df_data['slide_scene'] = [item.split('_')[0] for item in df_data.slide_scene]
df_data_select = df_data.loc[~df_data.slide_scene.isin(['HTA-33_scene001','SMTBx1-16_scene001']),:]

In [None]:
#by tissue
import warnings
warnings.filterwarnings('ignore')
s_grouper='slide_scene'
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

#calculate proportions
for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():
    df_prop = viz.prop_positive(df_data_select,s_cell=s_cell,s_grouper=s_grouper)
    # make annotations
    df_prop.to_csv(f'ManualGating_SMT_proportions_{s_cell}.csv')
    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)
    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))
    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.001,lut=lut)
    g.savefig(f'./GatingPlots/{s_cell}_clustermap_tissue.pdf',dpi=150)
    plt.close()
    if df_plot_less.shape[1] < 8:
        cmap = "Spectral"
    elif df_plot_less.shape[1] < 11:
        cmap = "Paired"
    else:
        cmap = "tab20"
    fig = viz.prop_barplot(df_plot_less,s_cell,colormap=cmap)
    fig.savefig(f'./GatingPlots/{s_cell}_bar_tissue.pdf',dpi=200)
    break

In [None]:
s_date = '20210402'
d_crop = {'SMTBx2-5_scene001': (2000,9000),
    'SMTBx3_scene004': (20000,16000),
    'HTA-33_scene002': (3271, 607),
    'SMTBx1-16_scene003': (2440,220),
        }
df_result = pd.DataFrame()
for s_tissue, tu_crop in d_crop.items():
    df_scene = df_data.loc[df_data.index.str.contains(s_tissue)]
    ls_index = df_scene.loc[((df_scene.DAPI_X > tu_crop[0]) & (df_scene.DAPI_X < tu_crop[0]+2500)) & (df_scene.DAPI_Y > tu_crop[1]) & (df_scene.DAPI_Y < tu_crop[1]+2500)].index
    df_result = df_result.append(df_data.loc[ls_index])


In [None]:
#by tissue
import warnings
warnings.filterwarnings('ignore')
s_grouper='slide_scene'
mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
d_rename = {'HTA-33':'Bx4', 'SMTBx1-16':'Bx1', 'SMTBx2-5':'Bx2', 'SMTBx3':'Bx3'}

#calculate proportions
for s_cell in df_data.columns[(df_data.dtypes=='object') & ~(df_data.columns.isin([s_grouper]))].tolist():
    df_prop = viz.prop_positive(df_result,s_cell=s_cell,s_grouper=s_grouper)
    # make annotations
    #df_prop.to_csv(f'ManualGating_SMT101_proportions_{s_cell}.csv')
    df_annot=pd.DataFrame(data={'ID': df_prop.index.tolist()},index=df_prop.index)
    lut = dict(zip(sorted(df_annot.ID.unique()),cm.tab10.colors))
    g, df_plot_less = viz.prop_clustermap(df_prop,df_annot,i_thresh =.001,lut=lut)
    g.savefig(f'./GatingPlots/{s_cell}_clustermap_tissue3.pdf',dpi=150)
    plt.close()
    if df_plot_less.shape[1] < 8:
        cmap = "Spectral"
    elif df_plot_less.shape[1] < 11:
        cmap = "Paired"
    else:
        cmap = "tab20"
    fig = viz.prop_barplot(df_plot_less.rename(d_rename),s_cell,colormap=cmap)
    fig.set_size_inches(4.5, 2.3)
    ax_list = fig.axes
    ax_list[0].set_ylabel('')
    ax_list[0].set_xlabel('Fraction of Cells')
    ax_list[0].set_title('')
    fig.suptitle('Gating Composition: Biopsies',x=0.5,y=0.9,fontsize=14)
    plt.tight_layout()
    fig.savefig(f'./GatingPlots/{s_cell}_bar_tissue3.png',dpi=200)
    #fig.savefig(f'./{s_date}/{s_cell}_bar_tissue3.pdf',dpi=200)

In [None]:
s_date