## Cluster Macsima 

**Samples:** 
- Breast cancer tissue from Miltenyi 

**Method**: Leiden clustering

In [None]:
#load libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import seaborn as sns
import scipy
import scanpy as sc
import matplotlib as mpl
mpl.rc('figure', max_open_warning = 0)
os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cmIF')
from mplex_image import visualize as viz, process, preprocess, normalize, mics, mpimage
np.random.seed(121)

In [None]:
#change to correct directory
s_dataset = 'MICS_2020-12-14_19-22-05_OHSU_ValII/run_20201214_jenny'
s_dataset = 'MICS_2021-03-04_15-18-22_OHSU_ValIII/run_20210331_jenny'
rootdir = f'/home/groups/graylab_share/Chin_Lab/ChinData/Cyclic_Workflow/{s_dataset}'
s_date = '20200121'
os.chdir(rootdir)
%matplotlib inline

In [None]:
# load data
df=pd.read_csv(f'{rootdir}/Segmentation/features_R-1_W-A-1_G-1_FilteredMeanIntensity_.csv',index_col=0)
df.columns = [item.split('_')[0] for item in df.columns]
len(df)

In [None]:
df_xy = pd.read_csv(f'{rootdir}/Segmentation/features_R-1_W-A-1_G-1_CentroidXY.csv',index_col=0)
df_xy.columns = [item.split('_')[1] for item in df_xy.columns]
df = df.merge(df_xy.loc[:,['area','eccentricity']],left_index=True,right_index=True)
df = df.astype({'area': 'float64'})

In [None]:
marker_genes = df.columns[df.dtypes=='float64'].tolist()
adata = sc.AnnData(df.loc[:,df.dtypes=='float64'])
adata.raw = adata
#reduce dimensionality
sc.tl.pca(adata, svd_solver='auto')

In [None]:
fig,ax=plt.subplots(figsize=(3.5,9))
sc.pl.highest_expr_genes(adata, n_top=48,ax=ax,save=f'Expression.png')
plt.tight_layout()

In [None]:
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
#sc.set_figure_params(scanpy=True, fontsize=14)
#df_pearson = pd.DataFrame()
marker_genes = df.columns[df.dtypes=='float64'].tolist()
adata = sc.AnnData(df.loc[:,df.dtypes=='float64'])
adata.raw = adata
#reduce dimensionality
sc.tl.pca(adata, svd_solver='auto')
# calculate neighbors 
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=31)
sc.tl.umap(adata)

In [None]:
ls_marker = adata.var.index.tolist()

In [None]:
#umap plot
figname = f"Umap.png"
#fig,ax = plt.subplots(figsize=(5,5), dpi=200)
sc.pl.umap(adata, color=ls_marker,wspace=.25,save=figname)

In [None]:
ls_marker = ['Vimentin','Cytokeratin-7','eccentricity','area','CD45RO','CD3','HLA-DR-DP-DQ',
 'Actin-SmoothM']

In [None]:
#umap plot
figname = f"Umap_select.png"
#fig,ax = plt.subplots(figsize=(5,5), dpi=200)
sc.pl.umap(adata, color=ls_marker,wspace=.25,save=figname,vmin='p1.5',vmax='p98.5',ncols=3)

In [None]:
#leiden
sc.tl.leiden(adata,resolution=0.6)
fig,ax = plt.subplots(figsize=(3,3),dpi=200)
figname=f'leiden.png'
sc.pl.umap(adata, color='leiden',ax=ax,save=figname)

In [None]:
adata_select = adata[adata.obs.leiden!='14',:]
sc.tl.dendrogram(adata_select,groupby='leiden')

In [None]:
df_a = pd.read_csv(f'{rootdir}/Segmentation/macsima.csv',index_col=0)
lut = dict(zip(set(df_a.Expr),palette))
col_colors = df_a.Expr.map(lut)
palette = sns.color_palette()
sns.palplot(palette)

In [None]:
lut

In [None]:
df_p = pd.DataFrame(data=adata_select.raw.X,index=adata_select.obs.index,columns=adata_select.var.index)
df_p['leiden'] = adata_select.obs['leiden']
g = sns.clustermap(df_p.groupby('leiden').mean(),z_score=1,figsize=(14,6),cmap='viridis',
                   vmin=-1.5,vmax=1.5, col_colors=col_colors) #cbar_kws={'center':0}
g.savefig(f'{rootdir}/figures/clustermap.png')
marker_genes = df_p.groupby('leiden').mean().iloc[:,g.dendrogram_col.reordered_ind].columns.tolist()
categories_order = df_p.groupby('leiden').mean().iloc[g.dendrogram_row.reordered_ind,:].index.tolist()

In [None]:
fig,ax = plt.subplots(figsize=(9,4), dpi=200)
figname=f'Matrixplot_leiden.png'
sc.pl.matrixplot(adata_select, var_names=marker_genes, groupby=f'leiden',title='',categories_order=categories_order,
                 ax=ax,save=figname,standard_scale='var',colorbar_title='Relative\nintensity',
                var_group_positions=[(3,23),(24,31),(32,42),(43,51)],
                 var_group_labels=['tumor','T-cell','muscle\n +AF','immune\n+stroma'],
                var_group_rotation=0)

In [None]:

df_p['scene'] = [item.split('_')[2] for item in df_p.index]
df_prop = (df_p.groupby([f'leiden','scene']).CD3.count())/(df_p.groupby(['scene']).CD3.count())
df_prop = df_prop.unstack().fillna(value=0).T
#barplot
fig,ax=plt.subplots(figsize=(3,3), dpi=200)
df_prop.plot(kind='bar',stacked=True,ax=ax,legend=True,cmap='tab20',width=.8)
ax.set_ylabel('Fraction Positive')
ax.legend(bbox_to_anchor=(1.02, .9), ncol=2,fontsize=10)
plt.tight_layout()
ax.set_title(f"Cluster Composition")
fig.savefig(f'./figures/StackedBar_Leiden.png')

In [None]:
#df_p.to_csv(f'{rootdir}/Segmentation/R-1_W-A-1_G-1_Leiden.csv')

In [None]:
df = pd.read_csv(f'{rootdir}/Segmentation/R-1_W-A-1_G-1_Leiden.csv',index_col=0)
df['slide_scene'] = 'R-1_W-A-1_' + df.scene
#df.head()
df_xy['slide_scene'] = df_xy.scene
df_xy['DAPI_Y'] = df_xy.Y
df_xy['DAPI_X'] = df_xy.X
#df_xy.head()

In [None]:
ls_fig = viz.plot_clusters(df,df_xy)
for idx, fig in enumerate(ls_fig):
    ls_fig['R-1_W-A-1_G-1'].savefig(f'{rootdir}/QC/SpatialPlots__Leiden.png')
    plt.show(fig)

In [None]:
os.chdir(f'{rootdir}/RegisteredImages/R-1_W-A-1_G-1')
df_img = mics.parse_org()

In [None]:
#d_annot = {0:'tumor',1:'stroma',2:'tumor',3:'tumor',4:'tumor',5:'stroma'
# 6:'stroma',7:'muscle',8:'Tcell'} #resolution = 0.4
d_annot= {'tumor':[1,2,4,5,6,8,10,12],
         'stroma':[0,3,7,9,13],
         'muscle':[11]}

In [None]:
ls_marker = df.columns[df.dtypes=='float64'].tolist()
ls_marker.remove('DAPI0')
ls_marker.remove('DAPI24')
ls_marker.remove('area')
ls_marker.remove('eccentricity')

In [None]:
for s_marker in df.columns[df.dtypes=='float64'].tolist():
    if s_marker == 'eccentricity':
        pass
    elif s_marker == 'area':
        pass    
    elif s_marker.find('DAPI') > -1:
        pass    
    else:
        try:
            s_channel = df_img[df_img.marker==s_marker].channel[0]
        except IndexError:
            pass
        fig,ax = plt.subplots(figsize=(3,2))
        for idx,(s_part,ls_ids) in enumerate(d_annot.items()):
            se_mi = df.loc[df.leiden.isin(ls_ids),s_marker]
            x = df.loc[df.leiden.isin(ls_ids),f'autofluorescence-{s_channel}'].median()
            ax.hist(se_mi,bins=50,alpha=0.4, color=f'C{idx}',label=s_part)
            #ax.vlines(x=x, ymin=0, ymax=ax.get_ylim()[1], colors=f'C{idx}', linestyles='dashed')
        ax.set_yscale('log')
        ax.legend()
        ax.set_title(s_marker)
        plt.tight_layout()
        fig.savefig(f'{rootdir}/figures/no_lines_histogram_{s_marker}.png')
        #break

In [None]:
#1
fig,ax = plt.subplots(4,6,figsize=(12,7))
ax=ax.ravel()
for idxs,s_marker in enumerate(ls_marker[:24]):
    try:
        s_channel = df_img[df_img.marker==s_marker].channel[0]
    except IndexError:
        pass
    for idx,(s_part,ls_ids) in enumerate(d_annot.items()):
        se_mi = df.loc[df.leiden.isin(ls_ids),s_marker]
        ax[idxs].hist(se_mi,bins=50,alpha=0.4, color=f'C{idx}',label=s_part)
    if not idxs == 23:
        ax[idxs].legend('',frameon=False)
    else:
        ax[idxs].legend()
    ax[idxs].set_yscale('log')
    ax[idxs].set_title(s_marker)
plt.tight_layout()
fig.savefig(f'{rootdir}/figures/histogramI.png')

In [None]:
#2
fig,ax = plt.subplots(4,6,figsize=(12,7))
ax=ax.ravel()
for idxs,s_marker in enumerate(ls_marker[24:]):
    try:
        s_channel = df_img[df_img.marker==s_marker].channel[0]
    except IndexError:
        pass
    for idx,(s_part,ls_ids) in enumerate(d_annot.items()):
        se_mi = df.loc[df.leiden.isin(ls_ids),s_marker]
        ax[idxs].hist(se_mi,bins=50,alpha=0.4, color=f'C{idx}',label=s_part)
    if not idxs == 23:
        ax[idxs].legend('',frameon=False)
    else:
        ax[idxs].legend()
    ax[idxs].set_yscale('log')
    ax[idxs].set_title(s_marker)
plt.tight_layout()
fig.savefig(f'{rootdir}/figures/histogramII.png')

# normalize

In [None]:
def plot_histograms(df_norm,df,s_train,s_tissue):
    df_norm['batch'] = [item.split('_')[0] for item in df_norm.index]
    bins=50
    for s_marker in df_norm.columns[df_norm.dtypes=='float64']:
        #print(s_marker)
        fig,ax=plt.subplots(figsize = (3,2))
        for idxs, s_batch in enumerate(sorted(set(df_norm.batch))):
            df_batch = df_norm[(df_norm.batch==s_batch)].loc[:,s_marker] 
            if len(df_batch.dropna()) == 0:
                continue
            ax.hist(df.loc[df.index.str.contains(s_batch),s_marker],bins=bins,alpha=0.4, color=f'C{idxs}')
            #ax[1].hist(df_batch,bins=bins,alpha=0.4, color=f'C{idxs}',label=s_batch)
            ax.set_yscale('log')
            #ax[1].set_yscale('log')
            ax.set_title(f'{s_marker.split("_")[0]}: Raw Data')
            #ax[1].set_title(f'{s_marker.split("_")[0]}: Combat')
            #ax.legend()
        plt.tight_layout()
        fig.savefig(f'{rootdir}/figures/{s_date}/Different_Scaling_combat_training_{s_train}_{s_marker}_{s_tissue}.png')
        #break
#all cores
data = df.loc[:,df.dtypes=='float64'].T
batch = df.slide
gamma_star, delta_star = normalize.combat_fit(data, batch)
#transform
bayesdata = normalize.combat_transform(data,batch,gamma_star, delta_star)
df_norm=bayesdata.T
s_train='all'
s_tissue = 'all'
plot_histograms(df_norm,df,s_train,s_tissue)

# visualize

In [None]:
#change to correct directory
s_dataset = 'MICS_2020-12-14_19-22-05_OHSU_ValII/run_20201214_jenny'
s_dataset = 'MICS_2021-03-04_15-18-22_OHSU_ValIII/run_20210331_jenny'
rootdir = f'/home/groups/graylab_share/Chin_Lab/ChinData/Cyclic_Workflow/{s_dataset}'
s_date = '20200121'
os.chdir(rootdir)
%matplotlib inline

In [None]:
#visualization
%matplotlib inline
import warnings;
warnings.filterwarnings('ignore')
os.chdir(f'{rootdir}/RegisteredImages/R-1_W-A-1_G-1/')
#antibody order: double application (K157), original (K154), optimized (K175)
d_crop ={#'G-1':(3024,1350,1800,900),
         #'G-1':(3300,1500,1200,800), #valII
         'G-1':(2800,1300,1200,800), #valIII
  }
os.chdir(f'{rootdir}/RegisteredImages/R-1_W-A-1_G-1/')
df_img = mics.parse_org()
df_img[' '] = df_img.channel
tu_array=(1,1)
tu_fig=(6,4.3)
#make arrays
for s_index in df_img.index: 
    s_marker = df_img.loc[s_index,'marker']
    s_scene = df_img.loc[s_index,'scene']
    s_round = df_img.loc[s_index,'rounds']
    if s_marker == 'DAPI':
        continue
    df_dapi = df_img[(df_img.scene==s_scene) & (df_img.rounds=='R24') & (df_img.color == 'c1')]
    df_et = df_img[df_img.index==s_index]
    #print(f'{s_marker}  {len(df_et)}')
    tu_crop = d_crop[s_scene]
    if s_marker == 'CD44':
        tu_crop = (tu_crop[0] + 2400, tu_crop[1], tu_crop[2], tu_crop[3]) #1500 valII
    elif s_marker == 'CD8':
        tu_crop = (tu_crop[0], tu_crop[1] - 800, tu_crop[2], tu_crop[3]) #1500 valII
    #visualize IF roi
    tu_rescale = (0,0)
    if s_marker == 'CD8':
        tu_rescale=(0,2500)
    fig, ax = mpimage.array_roi_if(df_et,df_dapi,s_label='rounds',s_title=s_marker,tu_crop=tu_crop,
            tu_array=tu_array,tu_fig=tu_fig,tu_rescale=tu_rescale,i_expnorm=0)
    fig.savefig(f'{rootdir}/QC/{s_round}_{s_scene}_{s_marker}_pixelunits.png')
    #break

## data analysis

In [None]:
os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cycIF_ValidationStudies/cycIF_Validation')

In [None]:
df_mi = pd.DataFrame()
for s_val in ['ValII','ValIII']:
    df = pd.read_csv(f'Data/features_{s_val}_FilteredMeanIntensity_.csv',index_col=0)
    df['slide'] = s_val
    df_mi=df_mi.append(df)
df_mi = df_mi.loc[:,df_mi.isna().sum()==0]

In [None]:
df_img = pd.DataFrame()
for s_val in ['ValII','ValIII']:
    df = pd.read_csv(f'Data/Marker_Round_Table_{s_val}.csv',index_col=0)
    df['slide'] = s_val
    df_img=df_img.append(df)

In [None]:
df_img = df_img[df_img.rounds!='R24']

In [None]:
for s_marker in df_mi.columns[df_mi.dtypes=='float64']:
    if len(df_img[(df_img.marker==s_marker.split('_')[0])&(df_img.slide==s_val)])>0:
        fig, ax = plt.subplots(figsize=(3,2.5),dpi=200)
        for s_val in ['ValII','ValIII']:        
            i_max = np.quantile(df_mi.loc[:,s_marker],.998)
            s_label = df_img[(df_img.marker==s_marker.split('_')[0])&(df_img.slide==s_val)].rounds[0]
            sns.kdeplot(df_mi.loc[df_mi.slide==s_val,s_marker],ax=ax,shade=True,label=s_label,alpha=0.4,clip=(0,i_max))                       
            ax.legend()
            ax.set_title(f'{s_marker.split("_")[0]}')
            ax.set_yticklabels('')
            ax.set_xlabel('Intensity')
            ax.set_xlim(0,i_max)
        plt.tight_layout()
        fig.savefig(f'Figures/Macsima_{s_marker}_kde.png')

In [None]:
#CD8 BG from napari
df_result = pd.DataFrame()
for s_val in ['ValII','ValIII']:
    for s_gr in ['bg','fg']:
        df = pd.read_csv(f'Data/result_{s_val}_CD8_{s_gr}.csv',index_col=0)
        df['panel'] = s_val
        df['region'] = s_gr
        df_result = df_result.append(df)

In [None]:
df_result['mean_intensity'] = df_result.mean_intensity*256

In [None]:
fig, ax = plt.subplots(figsize=(3,2.5),dpi=150)
sns.boxplot(x='panel', y='mean_intensity', hue='region', data=df_result, fliersize=0, ax=ax)
#sns.stripplot(x='panel', y='mean_intensity', hue='region', data=df_result,ax=ax,palette='dark')
ax.legend(bbox_to_anchor=(.79,.35)) #loc=4
ax.set_title('CD8 Signal-to-Background')
ax.set_xticklabels(['R6','R17'])
plt.tight_layout()
fig.savefig('./Figures/MICS_CD8_SBR.png')