# Single Cell Analysis of Quenched Slides

**Question:** How does autofluorescence change over repeated quenching reactions?

**Samples:** Three **normal pancreas** tissue sections were processed with the cyclif IF protocol, then repeatedly quenched for 15 minutes and imaged. No anitbody was applied. Quenching solution contained 3%, 4.5% and 6% H2O2, but no difference in quenching speed or completeness was observed between the conditions.

Three adjacent sections from **Biomax FDA808l-2** :  Multiple tumor (24 organs )and normal (6 organs) tissue array, with grade and TNM/Stage,72 cases/72 cores(core size 1.5mm), replacing FDA808k-2, were quenched and imaged repeatedly. (10 rounds)

HER2A TMA: **Biomax Her2a** : Her-2 IHC Bio-marker control TMA, with breast invasive ductal carcinoma, including TNM, clinical stage and pathology grade, 10 cases/ 10 cores, replaced by Her2b. Scenes #1, 4,5 8,9 good scenes til R10  #10, 7, 6 bad after R6  #2, 3 missing after R6; 11 missing after R6

**Method**: Slides were imaged as described above, registered and segmented based on DAPI stain. Nuclei were inflated by 5 pixels to roughly capture cytoplasmic signal.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import seaborn as sns
import matplotlib
import seaborn as sns
matplotlib.rcParams.update({'font.size': 15})
import umap
from sklearn.preprocessing import scale
import scanpy as sc

In [None]:
#set location of files
#os.chdir('/home/groups/graylab_share/OMERO.rdsStore/engje/Data/cycIF_ValidationStudies/cycIF_Validation')
codedir = os.getcwd()

In [None]:
pwd

# Import and Filter Data

## Normal Tissue TMA

In [None]:
%matplotlib inline
ls_sample = ['808L2-053','808L2-054','808L2-055'] #
ls_sample = ['HER2A-60','HER2A-61','HER2A-62']

In [None]:
df_mi = pd.DataFrame()
df_xy = pd.DataFrame()
for s_sample in ls_sample:
    df_mi = df_mi.append(pd.read_csv(f'{codedir}/Data/Quench/features_{s_sample}_patched_MeanIntensity_Shape__registeredimages.csv',index_col=0))
    df_xy = df_xy.append(pd.read_csv(f'{codedir}/Data/Quench/features_{s_sample}_CentroidXY.csv',index_col=0))
    #break

In [None]:
#filtering out edge cells from adjacent TMA cores
ls_drop = ['808L2-053_scene066',#wrong name
          ##'808L2-055_scene023', '808L2-055_scene052','808L2-055_scene056', '808L2-055_scene066', #missing mean int
           #'808L2-055_scene071' #missing df_xy
          ] 

#### BOTTOM ############
d_filter = {'808L2-053_scene006':(df_xy.DAPI_Y > 6000),'808L2-053_scene007':(df_xy.DAPI_Y > 7300),
            '808L2-053_scene019':(df_xy.DAPI_Y > 7000),'808L2-053_scene028':(df_xy.DAPI_Y > 8000),
           '808L2-053_scene020':(df_xy.DAPI_Y > 6000),'808L2-053_scene021':(df_xy.DAPI_Y > 6000),
            '808L2-053_scene027':(df_xy.DAPI_Y > 6000),'808L2-053_scene029':(df_xy.DAPI_Y > 8000),
           '808L2-053_scene030':(df_xy.DAPI_Y > 8000),'808L2-053_scene036':(df_xy.DAPI_Y > 8000),
           '808L2-053_scene037':(df_xy.DAPI_Y > 8000),'808L2-053_scene038':(df_xy.DAPI_Y > 8000),
           '808L2-053_scene046':(df_xy.DAPI_Y > 8000),'808L2-053_scene052':(df_xy.DAPI_Y > 6000),
           '808L2-053_scene053':(df_xy.DAPI_Y > 8000),'808L2-053_scene058':(df_xy.DAPI_Y > 6000),
           '808L2-053_scene059':(df_xy.DAPI_Y > 8000),
           #54
           '808L2-054_scene012':(df_xy.DAPI_Y > 8000),'808L2-054_scene013':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene019':(df_xy.DAPI_Y > 8000),'808L2-054_scene026':(df_xy.DAPI_Y > 6000),
           '808L2-054_scene027':(df_xy.DAPI_Y > 6000),'808L2-054_scene033':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene035':(df_xy.DAPI_Y > 8000),'808L2-054_scene036':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene041':(df_xy.DAPI_Y > 8000),'808L2-054_scene043':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene049':(df_xy.DAPI_Y > 7000),'808L2-054_scene050':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene052':(df_xy.DAPI_Y > 8000),'808L2-054_scene056':(df_xy.DAPI_Y > 8000),
           '808L2-054_scene058':(df_xy.DAPI_Y > 8000),'808L2-054_scene034':(df_xy.DAPI_Y > 8000),
            '808L2-054_scene044':(df_xy.DAPI_Y > 8000),'808L2-054_scene057':(df_xy.DAPI_Y > 8000),
           #55
           '808L2-055_scene012':(df_xy.DAPI_Y > 8000),'808L2-055_scene019':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene021':(df_xy.DAPI_Y > 8000),'808L2-055_scene025':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene030':(df_xy.DAPI_Y > 8000),'808L2-055_scene031':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene035':(df_xy.DAPI_Y > 8000),'808L2-055_scene040':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene041':(df_xy.DAPI_Y > 7000),'808L2-055_scene044':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene046':(df_xy.DAPI_Y > 8000),'808L2-055_scene054':(df_xy.DAPI_Y > 8000),
           '808L2-055_scene055':(df_xy.DAPI_Y > 8000),'808L2-055_scene059':(df_xy.DAPI_Y > 8000),
            '808L2-055_scene037':(df_xy.DAPI_Y > 6000),
           }
ls_filter_all = []
for s_scene, filtercon in d_filter.items():
    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()
    ls_filter_all = ls_filter_all + ls_filter
##### LEFT ############   
d_filter2 = {'808L2-053_scene011':(df_xy.DAPI_X < 1500),'808L2-053_scene019':(df_xy.DAPI_X < 1500),
            '808L2-053_scene020':(df_xy.DAPI_X < 1500),
            '808L2-053_scene028':(df_xy.DAPI_X < 1500),'808L2-053_scene029':(df_xy.DAPI_X < 1500),
            '808L2-053_scene030':(df_xy.DAPI_X < 1500),'808L2-053_scene037':(df_xy.DAPI_X < 1500),
            '808L2-053_scene038':(df_xy.DAPI_X < 1500),'808L2-053_scene046':(df_xy.DAPI_X < 1500),
            '808L2-053_scene047':(df_xy.DAPI_X < 1500),'808L2-053_scene048':(df_xy.DAPI_X < 1500),
            '808L2-053_scene053':(df_xy.DAPI_X < 1500),'808L2-053_scene057':(df_xy.DAPI_X < 1500),
            '808L2-053_scene064':(df_xy.DAPI_X < 1500),'808L2-053_scene065':(df_xy.DAPI_X < 1500),
            '808L2-053_scene072':(df_xy.DAPI_X < 1500),'808L2-053_scene056':(df_xy.DAPI_X < 1500),
             '808L2-053_scene069':(df_xy.DAPI_X < 1500),
            #54
             '808L2-054_scene007':(df_xy.DAPI_X < 1500),'808L2-054_scene020':(df_xy.DAPI_X < 1500),
            '808L2-054_scene024':(df_xy.DAPI_X < 1500),'808L2-054_scene025':(df_xy.DAPI_X < 1500),
            '808L2-054_scene027':(df_xy.DAPI_X < 1500),'808L2-054_scene029':(df_xy.DAPI_X < 1500),
            '808L2-054_scene030':(df_xy.DAPI_X < 900),'808L2-054_scene038':(df_xy.DAPI_X < 1500),
            '808L2-054_scene047':(df_xy.DAPI_X < 1500),'808L2-054_scene048':(df_xy.DAPI_X < 1500),
            '808L2-054_scene049':(df_xy.DAPI_X < 1500),'808L2-054_scene058':(df_xy.DAPI_X < 1500),
            '808L2-054_scene068':(df_xy.DAPI_X < 1500),'808L2-054_scene069':(df_xy.DAPI_X < 1500),
            #55
            '808L2-055_scene003':(df_xy.DAPI_X < 1500),'808L2-055_scene004':(df_xy.DAPI_X < 1500),
            '808L2-055_scene020':(df_xy.DAPI_X < 1000),'808L2-055_scene024':(df_xy.DAPI_X < 1000),
            '808L2-055_scene026':(df_xy.DAPI_X < 1000),'808L2-055_scene030':(df_xy.DAPI_X < 1000),
            '808L2-055_scene031':(df_xy.DAPI_X < 1000),'808L2-055_scene032':(df_xy.DAPI_X < 1000),
            '808L2-055_scene053':(df_xy.DAPI_X < 1000),'808L2-055_scene054':(df_xy.DAPI_X < 1000),
            '808L2-055_scene060':(df_xy.DAPI_X < 1000),'808L2-055_scene068':(df_xy.DAPI_X < 1000),
            '808L2-055_scene069':(df_xy.DAPI_X < 1000),'808L2-055_scene070':(df_xy.DAPI_X < 1000),
             '808L2-055_scene047':(df_xy.DAPI_X < 1000),'808L2-055_scene061':(df_xy.DAPI_X < 1000),
            }
for s_scene, filtercon in d_filter2.items():
    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()
    ls_filter_all = ls_filter_all + ls_filter
##### RIGHT ############
d_filter3 = {'808L2-053_scene001':(df_xy.DAPI_X > 8000),'808L2-053_scene002':(df_xy.DAPI_X > 8000),
             '808L2-053_scene019':(df_xy.DAPI_X > 8000),'808L2-053_scene027':(df_xy.DAPI_X > 6500),
            '808L2-053_scene028':(df_xy.DAPI_X > 8000),'808L2-053_scene029':(df_xy.DAPI_X > 8000),
            '808L2-053_scene032':(df_xy.DAPI_X > 8000),'808L2-053_scene037':(df_xy.DAPI_X > 8000),
            '808L2-053_scene038':(df_xy.DAPI_X > 8000),'808L2-053_scene046':(df_xy.DAPI_X > 8000),
            '808L2-053_scene047':(df_xy.DAPI_X > 8000),'808L2-053_scene057':(df_xy.DAPI_X > 8000),
            '808L2-053_scene064':(df_xy.DAPI_X > 8500),'808L2-053_scene067':(df_xy.DAPI_X > 8000),
            '808L2-053_scene071':(df_xy.DAPI_X > 6000),'808L2-053_scene052':(df_xy.DAPI_X > 7000),
            '808L2-053_scene054':(df_xy.DAPI_X > 8000),'808L2-053_scene045':(df_xy.DAPI_X > 6000),
             '808L2-053_scene056':(df_xy.DAPI_X > 8000),'808L2-053_scene063':(df_xy.DAPI_X > 8000),
             '808L2-053_scene066':(df_xy.DAPI_X > 6000),
            #54
            '808L2-054_scene007':(df_xy.DAPI_X > 8000),'808L2-054_scene010':(df_xy.DAPI_X > 6000),
            '808L2-054_scene019':(df_xy.DAPI_X > 8000),'808L2-054_scene024':(df_xy.DAPI_X > 8000),
            '808L2-054_scene028':(df_xy.DAPI_X > 8000),'808L2-054_scene029':(df_xy.DAPI_X > 8000),
            '808L2-054_scene030':(df_xy.DAPI_X > 8000),'808L2-054_scene033':(df_xy.DAPI_X > 8000),
            '808L2-054_scene038':(df_xy.DAPI_X > 8000),'808L2-054_scene047':(df_xy.DAPI_X > 8000),
            '808L2-054_scene048':(df_xy.DAPI_X > 8000),'808L2-054_scene049':(df_xy.DAPI_X > 8000),
            '808L2-054_scene058':(df_xy.DAPI_X > 8000),'808L2-054_scene059':(df_xy.DAPI_X > 8000),
            '808L2-054_scene060':(df_xy.DAPI_X > 8000),'808L2-054_scene065':(df_xy.DAPI_X > 7000),
            #55
            '808L2-055_scene003':(df_xy.DAPI_X > 8000),'808L2-055_scene019':(df_xy.DAPI_X > 8000),
            '808L2-055_scene024':(df_xy.DAPI_X > 8000),'808L2-055_scene025':(df_xy.DAPI_X > 8000),
            '808L2-055_scene026':(df_xy.DAPI_X > 8000),'808L2-055_scene031':(df_xy.DAPI_X > 8000),
            '808L2-055_scene038':(df_xy.DAPI_X > 7000),'808L2-055_scene046':(df_xy.DAPI_X > 8000),
            '808L2-055_scene053':(df_xy.DAPI_X > 8000),'808L2-055_scene067':(df_xy.DAPI_X > 7000),
            '808L2-055_scene068':(df_xy.DAPI_X > 8000),'808L2-055_scene033':(df_xy.DAPI_X > 8000),
             '808L2-055_scene047':(df_xy.DAPI_X > 8000),
            }
for s_scene, filtercon in d_filter3.items():
    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()
    ls_filter_all = ls_filter_all + ls_filter
    
### TOP ############
d_filter4 = {'808L2-053_scene014':(df_xy.DAPI_Y < 2000), '808L2-053_scene028':(df_xy.DAPI_Y < 1500),
             '808L2-053_scene029':(df_xy.DAPI_Y < 1500),
            '808L2-053_scene036':(df_xy.DAPI_Y < 1500),'808L2-053_scene037':(df_xy.DAPI_Y < 1500),
            '808L2-053_scene038':(df_xy.DAPI_Y < 1500),'808L2-053_scene053':(df_xy.DAPI_Y < 1500),
            '808L2-053_scene060':(df_xy.DAPI_Y < 1500),'808L2-053_scene061':(df_xy.DAPI_Y < 1500),
            '808L2-053_scene062':(df_xy.DAPI_Y < 1200),'808L2-053_scene067':(df_xy.DAPI_Y < 1500),
            '808L2-053_scene068':(df_xy.DAPI_Y < 1500),'808L2-053_scene066':(df_xy.DAPI_Y < 1500),
            #54
            '808L2-054_scene011':(df_xy.DAPI_Y < 1500),'808L2-054_scene012':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene033':(df_xy.DAPI_Y < 1500),'808L2-054_scene035':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene036':(df_xy.DAPI_Y < 1500),'808L2-054_scene041':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene042':(df_xy.DAPI_Y < 1500),'808L2-054_scene043':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene045':(df_xy.DAPI_Y < 1500),'808L2-054_scene050':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene052':(df_xy.DAPI_Y < 1500),'808L2-054_scene056':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene058':(df_xy.DAPI_Y < 1500),'808L2-054_scene065':(df_xy.DAPI_Y < 1500),
            '808L2-054_scene066':(df_xy.DAPI_Y < 1500),'808L2-054_scene034':(df_xy.DAPI_Y < 1500),
             '808L2-054_scene044':(df_xy.DAPI_Y < 1500),'808L2-054_scene057':(df_xy.DAPI_Y < 1500),
            #55
            '808L2-055_scene010':(df_xy.DAPI_Y < 1500),'808L2-055_scene011':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene012':(df_xy.DAPI_Y < 2000),'808L2-055_scene015':(df_xy.DAPI_Y < 2000),
            '808L2-055_scene031':(df_xy.DAPI_Y < 2000),'808L2-055_scene035':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene036':(df_xy.DAPI_Y < 1500),'808L2-055_scene044':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene046':(df_xy.DAPI_Y < 1500),'808L2-055_scene049':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene050':(df_xy.DAPI_Y < 1500),'808L2-055_scene054':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene055':(df_xy.DAPI_Y < 1500),'808L2-055_scene059':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene060':(df_xy.DAPI_Y < 1500),'808L2-055_scene063':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene064':(df_xy.DAPI_Y < 1500),'808L2-055_scene065':(df_xy.DAPI_Y < 1500),
            '808L2-055_scene068':(df_xy.DAPI_Y < 1500),'808L2-055_scene069':(df_xy.DAPI_Y < 2000),
            }
for s_scene, filtercon in d_filter4.items():
    ls_filter = df_xy[(df_xy.slide_scene==s_scene) & filtercon].index.tolist()
    ls_filter_all = ls_filter_all + ls_filter
    
ls_drop_filter = df_xy[df_xy.slide_scene.isin(ls_drop)].index.tolist() #[]#
# drop bad scenes
ls_filter_all = ls_filter_all + ls_drop_filter #ls_filter
#filter edge
#ls_filter_all = ls_filter_all + df_edge.index.tolist()
df_filter_mi=df_mi[~df_mi.index.isin(ls_filter_all)]

In [None]:
#for s_scene in ls_drop:
    #print(f'sbatch registration_matlab_{s_scene.replace("-","").replace("scene0","Scene")}.sbatch')
    #print(f'sbatch segmentation_nuccell_{s_scene.replace("-","")}.sbatch')
    #print(f'sbatch segmentation_match_{s_scene.replace("-","")}.sbatch')
ls_drop

In [None]:
#plot spatially  

for s_scene in sorted(set(df_xy.slide_scene)):
    #s_scene = '808L2-053_scene066'
    s_scene = 'HER2A-60_scene011'
    if len(df_xy[df_xy.slide_scene==s_scene])>=1:
        fig,ax = plt.subplots(dpi=100)
        #plot negative cells
        ax.scatter(data=df_xy[df_xy.slide_scene==s_scene],x='DAPI_X',y='DAPI_Y',color='silver',s=0.3,label=f'')
        if len(df_filter_mi[(df_filter_mi.slide_scene==s_scene)])>=1:
                #plot positive cells
                ls_pos = df_filter_mi[(df_filter_mi.slide_scene==s_scene)].index
                ax.scatter(data=df_xy.loc[ls_pos],x='DAPI_X',y='DAPI_Y',s=0.3,color='darkblue') #label=f'{s_color}',               
        ax.set_title(f'{s_scene}')
        ax.axis('equal')
        ax.set_ylim(ax.get_ylim()[::-1])
        #ax[idx].set_xticklabels('')
        #ax[idx].set_yticklabels('')
    break

In [None]:
plt.close(fig)

### rename scenes

match across TMAS

In [None]:
ls_scene = sorted(set([item.split('_')[1] for item in df_xy.slide_scene]))

In [None]:
#non matching scenes
d_replace = dict(zip(['808L2-053_' + item for item in ls_scene[17:65]],['808L2-053_' + item for item in ls_scene[18:66]]))
d_replace.update(dict(zip(['808L2-055_' + item for item in ls_scene[66:73]],['808L2-055_' + item for item in ls_scene[65:72]])))
d_replace

In [None]:
### drop theis scene from df_xy before renamimg
df_xy = df_xy[~df_xy.slide_scene.isin(ls_drop)]
df_xy['slide_scene_new'] = df_xy.slide_scene.replace(d_replace)
df_mi['slide_scene_new'] = df_mi.slide_scene.replace(d_replace)
#df_filter_mi['slide_scene_new'] = df_filter_mi.slide_scene.replace(d_replace)
df_filter_mi = df_filter_mi.merge(df_mi.loc[:,'slide_scene_new'],left_index=True,right_index=True) #,inplace=True

In [None]:
#do scenes match
for s_scene in sorted(set([item.split('_')[1] for item in df_xy.slide_scene])):
    fig,ax = plt.subplots(1,3,dpi=100, figsize=(12,4))
    for idx, s_slide in enumerate(ls_sample):
        s_scene ='scene011'
        #plot negative cells
        if len(df_xy[df_xy.slide_scene_new==f'{s_slide}_{s_scene}'])>=1:
            ax[idx].scatter(data=df_xy[df_xy.slide_scene_new==f'{s_slide}_{s_scene}'],x='DAPI_X',y='DAPI_Y',color='darkblue',s=0.3,label=f'')              
        ax[idx].set_title(f'{s_slide}_{s_scene}')
        ax[idx].axis('equal')
        ax[idx].set_ylim(ax[idx].get_ylim()[::-1])
        ax[idx].set_xticklabels('')
        ax[idx].set_yticklabels('')
    break

In [None]:
plt.close(fig)


In [None]:
#fix index
df_filter_mi.index = [df_filter_mi.loc[item,'slide_scene_new'] + '_' + item.split('_')[2] for item in df_filter_mi.index]
df_xy.index = [df_xy.loc[item,'slide_scene_new'] + '_' + item.split('_')[2] for item in df_xy.index]
df_filter_mi['scene'] = [item.split('_')[1] for item in df_filter_mi.index]

In [None]:
#df_filter_mi.loc[:,((df_filter_mi.dtypes!='float64') | (df_filter_mi.columns.isin(['nuclei_eccentricity'])))]
df_filter_mi['scene'] = [item.split('_')[1] for item in df_filter_mi.index]


In [None]:
#save tissue loss
s_slides = 'HER2A'#'808L2'# 
s_out = f'{codedir}/Data/Quench/features_{s_slides}_TissueLoss.csv'
if not os.path.exists(s_out):
    print('saving csv')
    df_filter_mi.loc[:,((df_filter_mi.dtypes!='float64') | (df_filter_mi.columns.isin(['nuclei_eccentricity'])))].to_csv(s_out)

In [None]:
# filter R10 dapi (808)
if s_slides == '808L2':
    df_filter_mi = df_filter_mi[df_filter_mi.DAPI10_nuclei_thresh.fillna(False)]
# filter R6 dapi (HER2A)
if s_slides == 'HER2A':
    df_filter_mi = df_filter_mi[df_filter_mi.DAPI6_nuclei_thresh.fillna(False)]

In [None]:
#save for clustering
s_slides = 'HER2A'#'808L2'# 
s_col = 'c2_perinuc5' #'c3_nuclei' #'c3_nuclei' #

if s_slides == '808L2':
    ls_drop_scene = ['scene006','scene012','scene013','scene016', 'scene072'] #scene6 lost in 55, 72 mis registered in 53, others few cells
    ls_drop_cols = []
if s_slides == 'HER2A':
    ls_drop_scene = ['scene011'] #HER2A
    ls_drop_cols = ['R7c2_perinuc5', 'R8c2_perinuc5', 'R9c2_perinuc5','R10c2_perinuc5'] #HER2A goot until R6

s_out = f'{codedir}/Data/Quench/features_{s_slides}_{s_col}_MeanIntensity.csv'
if not os.path.exists(s_out):
    print('saving csv')
    df_filter_mi.loc[~df_filter_mi.scene.isin(ls_drop_scene),df_filter_mi.columns.str.contains(s_col)].drop(ls_drop_cols,axis=1).dropna().to_csv(s_out)

In [None]:
#save corrected xy
if not os.path.exists(f'{codedir}/Data/Quench/features_{s_slides}_CentroidXY.csv'):
    print('saving csv')
    df_xy.to_csv(f'{codedir}/Data/Quench/features_{s_slides}_CentroidXY.csv')

## Normal Pancreas

In [None]:
#import single cell intensity data
ls_sample = ['4165NPanc-74','4165NPanc-76','4165NPanc-78']
df_data = pd.DataFrame()
df_xy=pd.DataFrame()
for s_sample in ls_sample:
    print(s_sample)
    df = pd.read_csv(f'{codedir}/Data/Quench/features_{s_sample}_FilteredMeanIntensity_DAPI6_DAPI1.csv',index_col=0)
    df['sample'] = s_sample
    df_data = df_data.append(df)
    df_xy = df_xy.append(pd.read_csv(f'{codedir}/Data/Quench/features_{s_sample}_CentroidXY.csv',index_col=0))


In [None]:
#select nucleus and 5 pixels around nucleus (perinuc5) mean intensity features
ls_cyto = sorted(df_data.columns[(df_data.columns.str.contains('perinuc5')) & (~df_data.columns.str.contains('DAPI')) & (~df_data.columns.str.contains('R6'))]) 
ls_nuc = [item.split('_')[0] + '_nuclei' for item in ls_cyto] 
ls_cyto = ls_cyto + ['sample']
df_cyto = df_data.loc[:,ls_cyto].dropna()
df_nuc = df_data.loc[:,ls_nuc].dropna()
df_both = df_data.loc[:,ls_cyto+ls_nuc].dropna()

In [None]:
# analyze only channel 2 nd 3 (488 and 555)
df_af = df_both.loc[:,(df_both.columns.str.contains('c2') |  df_both.columns.str.contains('sample'))] #df_both.columns.str.contains('c3') | #shading artifact

#sort column names
df_af = df_af.loc[:,sorted(df_af.columns)]

In [None]:
# save as csv for scanpy
df_af.columns = [item.replace('_nuclei','_nuc') for item in df_af.columns]
df_af.columns = [item.replace('_perinuc5','_cyt') for item in df_af.columns]
df_af.drop('sample',axis=1).to_csv(f'{codedir}/Data/Quench/features_4165NPanc_AF_scanpy.csv')

# Scanpy

single cell analysis

In [None]:
s_sample = 'features_4165NPanc_AF_scanpy'
s_sample = '808L2_c2_nuclei_MeanIntensity'
s_sample = 'HER2A_c3_nuclei_MeanIntensity'
s_slide = s_sample.split('_')[0]
filename = f'{codedir}/Data/Quench/features_{s_sample}.csv'
adata = sc.read_csv(filename)
fig,ax=plt.subplots()
sc.pl.highest_expr_genes(adata, n_top=12,ax=ax)
fig.savefig(f'{codedir}/Figures/{s_slide}_RankedExpression.png',dpi=200)

#adata.obs
if s_slide == '4165NPanc':
    adata.obs['tissue'] = [item.split('_scene')[0].split('4165')[1] for item in adata.obs.index]
else:
    adata.obs['tissue'] = [item.split('_scene')[0] for item in adata.obs.index]
#log transform, batch correct reduce dimensionality (PCA)
sc.pp.log1p(adata)
adata.raw = adata
#sc.pp.scale(adata, max_value=50) don't scale since all are same exposure

#remove batch effect
sc.pp.combat(adata,key='tissue')

#reduce dimensionality
sc.tl.pca(adata, svd_solver='auto')
fig, ax = plt.subplots()
sc.pl.pca(adata,ax=ax)
sc.pl.pca_variance_ratio(adata, log=True)

In [None]:
#umap run_scanpy.py
'''
sc.pp.neighbors(adata, n_neighbors=25) #10 for 1 tissue (4165), 25 for TMA
sc.tl.umap(adata)
results_file = f'{codedir}/Data/{s_slide}_AF_norm.h5ad'  # the file that will store the analysis results
#adata.write(results_file)
#results_file = f'{codedir}/Data/{s_slide}_AF_raw.h5ad'  # the file that will store the analysis results
#rdata.write(results_file)
'''

# load umap

In [None]:
s_sample = 'features_4165NPanc_AF_scanpy'
s_sample = '808L2_c3_nuclei_MeanIntensity'
#s_sample = 'HER2A_c3_nuclei_MeanIntensity'
s_slide = s_sample.split('_')[0]
s_sample = s_sample.split('_MeanIntensity')[0]
n_neighbors= 25 #10 #25 for 808 #20 for HER2A
if s_slide == '808L2':
    if n_neighbors==25:
        filename = f'{codedir}/Data/{s_sample}_norm_{n_neighbors}-5s.h5ad'
    else:
        filename = f'{codedir}/Data/{s_slide}_AF_norm_{n_neighbors}-5s.h5ad'
else:
    filename = f'{codedir}/Data/{s_sample}_norm_{n_neighbors}-5s.h5ad'
print(filename)
rawfilename = filename
adata = sc.read_h5ad(filename)

In [None]:
#umap plot
figname = f'{codedir}/Figures/{s_sample}_UmapBatch_{n_neighbors}.png'
fig, ax = plt.subplots(dpi=150)
sc.pl.umap(adata, color='tissue',ax=ax,title='Normalized')
plt.show(fig)
plt.tight_layout()
#fig.savefig(figname, dpi=150)
fig.savefig(figname,bbox_inches='tight', dpi=150)

In [None]:
# umap plus expression
sc.set_figure_params(fontsize=16)
for s_color in sorted(adata.var.index):
    #s_color = 'R0c2_nuclei'#'R2c2_nuc'
    figname = f'{codedir}/Figures/{s_sample}_{n_neighbors}_{s_color}_Umap.png'
    fig, ax = plt.subplots(figsize = (5,4),dpi=150)
    sc.pl.umap(adata, color=s_color, wspace=0.2,title=s_color,ax=ax,vmin=6,vmax=8) #size = 1.5, vmin='p1.5',vmax='p98.5'
    plt.tight_layout()
    plt.show(fig)
    fig.savefig(figname,bbox_inches='tight')
    #break

In [None]:
#load annotation
df_a = pd.read_csv(f'{codedir}/Data/FDA808l-2 specs.csv')
#grade
df_a['Grade'] = df_a.Grade.replace('-',np.nan)
#stage
d_replace = {'-':np.nan, 'IA':'I', 'III':"III", 'IIA':"II", 'IIB':'II', 'IIIA':"III", 'IB':"I",
       ' IIB G2':"II", 'IIIB':"III", 'IVB':'IV', 'IIA G3':"II", 'IIB G3 ':"II", 'IIB G2':"II"}
df_a['Stage'] = df_a.Stage.replace(d_replace)
df_a.loc[:,'Pathology'] = df_a.loc[:,'Pathology diagnosis']
df_a.loc[:,'Age_Q'] = pd.qcut(df_a.Age,4,labels=["Q1", "Q2", "Q3","Q4"])
# add scene
adata.obs['scene'] = [item.split('_')[1] for item in adata.obs.index]

In [None]:
for s_type in ['Type','Grade','Stage','Sex','Organ','Pathology','Age_Q']: #
    figname = f'{codedir}/Figures/{s_sample}_{n_neighbors}_{s_type}_Umap.png'
    fig, ax = plt.subplots(figsize = (5,4),dpi=150)
    adata.obs[s_type] = adata.obs['scene'].map(dict(zip(df_a.Scene,df_a.loc[:,s_type])))
    sc.pl.umap(adata, color=s_type, wspace=0.2,title=s_type,ax=ax) 
    plt.tight_layout()
    plt.show(fig)
    fig.savefig(figname,bbox_inches='tight')
    #break

In [None]:
# umap plus annotation
'''
s_color = 'R10c2_nuclei'#'R5c2_nuclei'#'R5c2_nuc'
sc.set_figure_params(fontsize=16)
figname = f'{codedir}/Figures/{s_slide}_{s_color}_Umap{n_neighbors}.png'
fig, ax = plt.subplots(figsize = (5,4),dpi=150)
sc.pl.umap(adata, color=s_color, wspace=0.2,title='AF488: 60 min',ax=ax,vmin=5,vmax=9)
plt.tight_layout()
plt.show(fig)
fig.savefig(figname,bbox_inches='tight')
'''

In [None]:
#cluster
if s_slide == '4165NPanc':
    resolution = 0.3
else:
    resolution = 0.2
print(resolution)
if adata.obs.columns.isin(['leiden']).sum() == 0:
    sc.tl.leiden(adata, resolution = resolution)
# umap plus clustering result 
sc.set_figure_params(fontsize=16)
figname = f'{codedir}/Figures/{s_sample}_UmapLeiden_neigh{n_neighbors}.png'
fig, ax = plt.subplots(figsize = (4,4),dpi=150)
sc.pl.umap(adata, color='leiden',wspace=0.2,title='Clustering w/ Leiden algorithm',ax=ax) #size=1,
plt.tight_layout()
plt.show(fig)
fig.savefig(figname,bbox_inches='tight')
#save results
#resolution = 0.2
results_file = f'{codedir}/Data/{s_sample}_norm_{n_neighbors}_leiden{resolution}.h5ad'  # the file that will store the analysis results
adata.write(results_file)
#save as csv
#adata = sc.read_h5ad(results_file)
df_data = pd.DataFrame(adata.X, columns = adata.var.index, index = adata.obs.index)
#df_raw = pd.DataFrame(rdata.raw.X, columns = adata.var.index, index = adata.obs.index)
#save to csv
df_data.merge(adata.obs,left_index=True,right_index=True).to_csv(f'{codedir}/Data/{s_sample}_norm_{n_neighbors}_leiden{resolution}.csv')
#df_raw.merge(adata.obs,left_index=True,right_index=True).to_csv(f'{codedir}/Data/{s_slide}_AF_raw_{n_neighbors}_leiden{resolution}.csv')


In [None]:
s_sample = 'HER2A_c2_nuclei'

In [None]:
#raw umap
resolution=0.2
results_file = f'{codedir}/Data/{s_sample}_norm_{n_neighbors}_leiden{resolution}.h5ad'  # the file that will store the analysis results
print(results_file)
#load leiden
adata = sc.read_h5ad(results_file)
rawfilename = f'{codedir}/Data/Quench/features_{s_sample}_MeanIntensity.csv'
print(rawfilename)
#load raw
rdata = sc.read_csv(rawfilename)
if s_slide == '4165NPanc':
    rdata.obs['tissue'] = [item.split('_scene')[0].split('4165')[1] for item in rdata.obs.index]
else:
    rdata.obs['tissue'] = [item.split('_scene')[0] for item in rdata.obs.index]
rdata.raw = rdata
#log transform, reduce dimensionality (PCA)
sc.pp.log1p(rdata)

#don't remove batch effect
#sc.pp.combat(adata,key='tissue')
#reduce dimensionality
sc.tl.pca(rdata, svd_solver='auto')
'''
sc.pp.neighbors(rdata, n_neighbors=10)
#sc.tl.umap(rdata)
#umap plot
figname = f'{codedir}/Figures/{s_slide}_UmapRaw.png'
#umap plot
fig, ax = plt.subplots(dpi=150)
sc.pl.umap(rdata, color='tissue',ax=ax,title='Raw')
plt.show(fig)
plt.tight_layout()
fig.savefig(figname, dpi=150)
'''
#save
df_raw = pd.DataFrame(rdata.raw.X, columns = adata.var.index, index = adata.obs.index)
df_raw.merge(adata.obs,left_index=True,right_index=True).to_csv(f'{codedir}/Data/{s_sample}_raw_{n_neighbors}_leiden{resolution}.csv')


# Plot Clustering Results

In [None]:
#load leiden clusters
resolution = 0.2
n_neighbors = 25
s_sample = '808L2_c2_nuclei'
s_slide = '808L2'#'HER2A'#'4165NPanc'
if s_slide == '4165NPanc':
    adata = sc.read_h5ad(f'{codedir}/Data/{s_slide}_AF_norm_leiden.h5ad')
else:
    adata = sc.read_h5ad(f'{codedir}/Data/{s_sample}_norm_{n_neighbors}_leiden{resolution}.h5ad')
adata.obs['scene'] = [item.split('_')[1] for item in adata.obs.index]

In [None]:
adata.var.index

In [None]:
# inspect clusters
if s_slide == '4165NPanc':
    marker_genes = ['R1c2_nuc','R2c2_nuc','R3c2_nuc','R4c2_nuc','R5c2_nuc']
    marker_genes_cyt = ['R1c2_cyt','R2c2_cyt','R3c2_cyt','R4c2_cyt','R5c2_cyt']
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,6))
    ax1_dict = sc.pl.matrixplot(adata, marker_genes, groupby='leiden', ax=ax1,show=False,dendrogram=False, cmap='viridis', use_raw=True, colorbar_title='Nuclear \nintensity (log)')
    ax2_dict = sc.pl.matrixplot(adata, marker_genes_cyt, groupby='leiden',ax=ax2,show=False, dendrogram=False, cmap='viridis', use_raw=True, colorbar_title='Cytoplasm \nintensity (log)')
    ax1_dict['mainplot_ax'].set_xlabel('Rounds of Quenching')
    ax1_dict['mainplot_ax'].set_xticklabels(['0','1','2','3','4'])
    ax1_dict['mainplot_ax'].set_title('Nuclear Autofluorescence')
    ax1_dict['mainplot_ax'].set_ylabel('Cluster ID')
    ax2_dict['mainplot_ax'].set_xlabel('Rounds of Quenching')
    ax2_dict['mainplot_ax'].set_xticklabels(['0','1','2','3','4'])
    ax2_dict['mainplot_ax'].set_title('Cytoplasmic Autofluorescence')
    fig.suptitle(f'Mean Intensity of Each Cluster, by round, location')
    plt.tight_layout()
    fig.savefig(f'{codedir}/Figures/{s_slide}_MatrixPlot_Leiden.png',dpi=200)
else:
    if s_slide == 'HER2A':
        marker_genes = ['R00c2_nuclei', 'R0c2_nuclei', 'R1c2_nuclei', 'R2c2_nuclei', 'R3c2_nuclei',
           'R4c2_nuclei', 'R5c2_nuclei','R6c2_nuclei']
    elif s_slide == '808L2':
        marker_genes = ['R00c2_nuclei', 'R0c2_nuclei', 'R1c2_nuclei', 'R2c2_nuclei',
         'R3c2_nuclei', 'R4c2_nuclei', 'R5c2_nuclei', 'R6c2_nuclei',
         'R7c2_nuclei', 'R8c2_nuclei', 'R9c2_nuclei','R10c2_nuclei']
    fig, ax1 = plt.subplots(figsize=(6,6))
    ax1_dict = sc.pl.matrixplot(adata, marker_genes, groupby='leiden', ax=ax1,show=False,dendrogram=False,
                                cmap='viridis', colorbar_title='Nuclear \nintensity (log)',
                               vmin=5,vmax=9)
    ax1_dict['mainplot_ax'].set_xlabel('Rounds of Quenching')
    #ax1_dict['mainplot_ax'].set_xticklabels(['0','1','2','3','4'])
    ax1_dict['mainplot_ax'].set_title('Nuclear Autofluorescence')
    ax1_dict['mainplot_ax'].set_ylabel('Cluster ID')   


In [None]:
fig,ax =plt.subplots(figsize=(6,5))
if s_slide == '4165NPanc':
    ls_drop = ['8','9','10']
    adata_new = adata[~adata.obs['leiden'].isin(ls_drop),:]
    axes_dict = sc.pl.matrixplot(adata_new, marker_genes, groupby='leiden', ax=ax,show=False,dendrogram=False, cmap='viridis', use_raw=True, colorbar_title='Nuclear \nintensity (log)')
    axes_dict['mainplot_ax'].set_title('Cluster Autofluorescence')
    axes_dict['mainplot_ax'].set_xlabel('Rounds of Quenching')
    axes_dict['mainplot_ax'].set_ylabel('Cluster ID')
    axes_dict['mainplot_ax'].set_xticklabels(['0','1','2','3','4'])
    fig.savefig(f'{codedir}/Figures/{s_slide}_MatrixPlot7_Leiden.png',dpi=200)
elif s_slide == 'HER2A':
    ls_drop = ['6','7','8','9','10']
    ls_scene = ['scene006','scene004','scene005','scene009']
    adata_new = adata[((~adata.obs['leiden'].isin(ls_drop)) & (adata.obs['scene'].isin(ls_scene))),:]
    axes_dict = sc.pl.matrixplot(adata_new, marker_genes, groupby='leiden', ax=ax,show=False,dendrogram=False, cmap='viridis', use_raw=True, colorbar_title='Nuclear \nintensity (log)')
    axes_dict['mainplot_ax'].set_title('Cluster Autofluorescence')
    axes_dict['mainplot_ax'].set_xlabel('Rounds of Quenching')
    axes_dict['mainplot_ax'].set_ylabel('Cluster ID')
    axes_dict['mainplot_ax'].set_xticklabels(['0','1','2','3','4','5','6'])

In [None]:
df_out = adata_new.to_df()
df_out['leiden'] = adata_new.obs.leiden
df_out['scene'] = adata_new.obs.scene
df_out['tissue'] = adata_new.obs.tissue

In [None]:
def plot_clusters(df_cluster,df_xy):
    s_type = df_cluster.columns[df_cluster.dtypes=='int64'][0]
    print(s_type)
    ls_scene = sorted(set(df_cluster.slide_scene))
    ls_color = sorted(set(df_cluster.loc[:,s_type].dropna()))
    ls_fig = []
    for s_scene in ls_scene:
        #negative cells = all cells even before dapi filtering
        df_neg = df_xy[(df_xy.slide_scene==s_scene)]
        #plot
        fig, ax = plt.subplots(len(ls_color)//2 +1, 2, figsize=(12,16),dpi=200)
        ax = ax.ravel()
        for ax_num, s_color in enumerate(ls_color):
            s_marker = s_color
            #positive cells = poitive cells based on threshold
            ls_pos_index = (df_cluster[df_cluster.loc[:,s_type]==s_color]).index
            df_color_pos = df_neg[df_neg.index.isin(ls_pos_index)]
            if len(df_color_pos)>=1:
                #plot negative cells
                ax[ax_num].scatter(data=df_neg,x='DAPI_X',y='DAPI_Y',color='silver',s=1)
                #plot positive cells
                ax[ax_num].scatter(data=df_color_pos, x='DAPI_X',y='DAPI_Y',color='DarkBlue',s=.5)
                  
                ax[ax_num].axis('equal')
                ax[ax_num].set_ylim(ax[ax_num].get_ylim()[::-1])
                ax[ax_num].set_xticklabels('')
                ax[ax_num].set_yticklabels('')
                ax[ax_num].set_title(f'{s_color} ({len(df_color_pos)} cells)')
            else:
                ax[ax_num].set_xticklabels('')
                ax[ax_num].set_yticklabels('')
                ax[ax_num].set_title(f'{s_color}  ({(0)} cells')
        
        fig.suptitle(s_scene)
        ls_fig.append(fig)
    return(ls_fig)

In [None]:
s_slide = 'HER2A' #'4165NPanc'
df_xy = pd.read_csv(f'{codedir}/Data/Quench/features_{s_slide}_CentroidXY.csv',index_col=0)

In [None]:
%matplotlib inline

In [None]:
#load
if s_slide == '4165NPanc':
    df_lei = pd.read_csv(f'{codedir}/Data/{s_slide}_AF_norm_leiden.csv', index_col=0)
else:
    df_lei = adata.to_df()
    df_lei['leiden'] = [int(item) for item in adata.obs.leiden]
    df_lei['tissue'] = adata.obs.tissue
df_lei['slide_scene'] = [item.split('_cell')[0] for item in df_lei.index]
i_scene = len(df_lei.slide_scene.unique())/len(df_lei.tissue.unique())
ls_fig = plot_clusters(df_lei,df_xy)
for idx, fig in enumerate(ls_fig):
    print(idx)
    fig.savefig(f'{codedir}/Figures/SpatialPlots_{ls_sample[int(idx//i_scene)]}_{int(idx)}_Leiden.png')
    plt.close(fig)
    #break

In [None]:
#if not os.path.exists(f'{codedir}/Data/{s_slide}_AF_norm_leiden.csv'):
#    df_lei.to_csv(f'{codedir}/Data/{s_slide}_AF_norm_leiden.csv')

# Histograms

In [None]:
#histograms
df_plot = df_cyto
d_result = {}
for s_channel in ['c2','c3','c4','c5']:
    #plot single cell mean intensity
    df_channel = df_plot.loc[:,df_plot.columns.str.contains(f'{s_channel}_')]
    fig,ax = plt.subplots(6,1,figsize=(6,14),dpi=200)
    ax=ax.ravel()
    for idxs, s_sample in enumerate(ls_sample):
        df_sample = df_channel[df_channel.index.str.contains(s_sample)]
        df_sample_diff = pd.DataFrame(index=df_sample.index)        
        for idx, s_round in enumerate(['R1','R2','R3','R4','R5','R6']):
            df_round = df_sample.loc[:,df_sample.columns.str.contains(f'{s_round}c')]
            if idx == 0:
                i_max = np.quantile(df_round,.99)
            sns.distplot(df_round,bins=500,kde=False,ax=ax[idx],label=s_sample)
            i_mean = df_round.mean() 
            ax[idx].axvline(i_mean[0],ymin=0,ymax=1,color=f'C{idxs}',linestyle='--')
            ax[idx].set_xlabel('')  
            ax[idx].set_title(f'{s_round} {s_channel}')
            ax[idx].set_ylabel('No. of Cells')
            ax[idx].set_xlim(0,i_max)
            if s_round != 'R1':
                df_sample_diff[f'{s_round} - {s_last}'] = df_round.values - df_last.values
            df_last = df_round
            s_last = s_round
        ax[idx].legend()  
        ax[idx].set_xlabel(f'Intensity')
        d_result.update({s_sample:df_sample_diff})
    plt.tight_layout()
    fig.savefig(f'{codedir}/Figures/4165NPanc/SingleCellDist_{s_channel}.png')
    if s_channel != 'c2':
        plt.close(fig)
