# Measure CellPose ROIs to FCS

Likely can be used for other ROI sources, but need:  
1. folder and filename pattern containing component images. 
2. folder and filename pattern containing cell segmentation masks.
3. output folder and filename for csv and FCS file of measuress. 

Todo:  
1. Take tissue segmentation into account.
2. consider running pixie or CellSeg (Stanford pipeline) 
3. adapt for higher resoultion images

What this does:  
1. Reads in masks and component image files
2. Normalization of compononent images to let the intensities be on the same scale.
3. use scikit-image.measure to collect measures of interest at cell and nuclear level? My masks are at entire-cell level. 
4. Prepares files in ark-analysis folder & naming conventions to get total intensities, normalized


Depends on:  
Run Format_Polaris_for_ark notebook before this.

In [None]:
import numpy as np
import pandas as pd
import os
import tifffile as tif
import fnmatch
import re

from skimage import io, color, filters, exposure
import cv2

import matplotlib.pyplot as plt
# %matplotlib inline 

# ROI measures
from skimage.measure import label, regionprops, regionprops_table

import xarray as xr
import shutil

In [None]:
#!pip install flowkit

In [None]:
# include measurement code from ark-analysis
import marker_quantification

In [None]:
import flowkit as fk
from bokeh.io import output_notebook, show
#%bokeh inline 

In [None]:
# load files
# Set file paths
root_dir = '/Users/annmstrange/Documents/Projects/Tumor IF'

components_dir= os.path.join(root_dir, "Panel2/Export P28 21 full40x")
#components_dir = os.path.join(root_dir, "Panel2/Export P28 full40x")  
#components_dir = os.path.join(root_dir, "Panel3/Export P68 full40x")

#mask_img_path='Panel2/CellPose_moreCyto/Masks'
masks_dir = os.path.join(root_dir, "Panel2/CellPose40x_21/Masks")
#masks_dir = os.path.join(root_dir, "Panel2/CellPose40x_23/Masks")
#masks_dir = os.path.join(root_dir, "Panel3/CellPose40x_23/Masks")

tifs_dir=os.path.join(root_dir, 'Panel2/ark-analysis_21')
#tifs_dir=os.path.join(root_dir, 'Panel2/ark-analysis_23')
#tifs_dir=os.path.join(root_dir, 'Panel3/ark-analysis_23')


fcs_path=os.path.join(root_dir,'Panel3/FCS')
output_path_P28_21 = os.path.join(root_dir,'Panel2/output_21')
output_path_P28_23 = os.path.join(root_dir,'Panel2/output_23')
output_path_P68 = os.path.join(root_dir,'Panel3/output_23')

channels_p28 = ['CD3','pSTAT3 Y705', 'CD4', 'pSTAT5','pSTAT3 S727','pSTAT1', 'SOX10S100', 'CD8','DAPI','Autofluorescence']
# Opal 480, 520, 540, 570, 620, 650, 690, 780, DAPI
channels_p68 = ['pSTAT6','SOX10S100','pSTAT4', 'CD4','CD3',
                'pSTAT1 S727', 'pSTAT2', 'CD8','DAPI','Autofluorescence']

#channels = ['CD3','Ki67', 'CD4', 'SOX10S100', 'CD39','CD83','CD38', 'CD8','DAPI','Autofluorescence']
opals = ['Opal 480', 'Opal 520', 'Opal 540', 'Opal 570', 'Opal 620','Opal 650','Opal 690', 'Opal 780', 'DAPI', '']

output_path = output_path_P28_21
channels=channels_p28

In [None]:
def get_files_in_folder (src, pattern):
    '''
    Args: src is the full path where to look recursively
    pattern: string like '*_pattern.tif' to use with fnmatch.filter
    Returns: list of full filenames
    '''
    # build list of filenames we want
    fname_list = []

    for dirpath, dir, files in os.walk(src):
        for filename in fnmatch.filter(files, pattern):
            fname = filename
            fullname = os.path.join(dirpath, filename)
            fname_list.append(fullname)
            
    return sorted(fname_list) 

 
fname_component = get_files_in_folder(components_dir, '*_component_data.tif') 

print('found {0} files matching the pattern'.format(len(fname_component)))
print(fname_component[0])


In [None]:
# for normalization
def get_max_intensities_by_channel (filename_list):
    '''
    Arguments: filename_list is list of component files
    Returns: array of length 10 with the max intensity in each channel
    '''

    tally_arr = np.zeros(10)
    #print(tally_arr.shape)
    for file in filename_list:
        img_arr = io.imread(file)
        max_values = np.max(np.max(img_arr, axis=2),axis=1)
        #print(max_values)
        #print(max_values.shape)
        tally_arr = np.max([tally_arr, max_values], axis = 0)
        #print(tally_arr)
        
    return tally_arr

print("checking {} images in list".format(len(fname_component)))
max_intensities = get_max_intensities_by_channel(fname_component)
print(max_intensities)

In [None]:
# get a component from our data
fname_masks = get_files_in_folder(masks_dir, 'MASK*_rgb.tif') 
mask_file1 = fname_masks[0]
mask_file1



In [None]:
# component files for intensity
# get a component from our data
fname_components = get_files_in_folder (components_dir, '*_component_data.tif') 
component_file1 = fname_components[0]
component_file1

In [None]:
# better to make sure the filenames exactly match up. 
# with a mask file, convert filename

comp_file = mask_file1.replace("MASK_", "").replace("_rgb", "_component_data").replace(masks_dir, components_dir)
print(comp_file)

os.path.exists(comp_file)

In [None]:
# For normalization of each channel's intensity, get max intensities

In [None]:
# Todo: Need X Position, Y Position from centroid tuple 0 and 1 (Y and X)
#.     column rename centroid-0 to Y Position. centroid-1 to X Position
# Do I want nuclear vs whole cell? labels to start with 'Entire Cell' to match InForm
# intensity_max, stdev? 

# columns phenoptr spatial probably wants _____

In [None]:
# test xarray is working

# data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]})
# data

In [None]:
masks_dir

In [None]:
# get same stats as ark-analysis:
# now extract the segmented imaging data to create normalized and transformed expression matrices
# note that if you're loading your own dataset, please make sure all the imaging data is in the same folder
# with each fov given its own folder and all fovs having the same channels

#fldr = 'Panel2/ark-analysis/091221 P9HuP28 #10 S16-003395 A1_[13463,50983]'
#tiff_dir = 'Panel2/ark-analysis'
sample1 = '091221 P9HuP28 #10 S16-003395 A1_[19347,49169]'
#mask_dir = os.path.join(root_dir,'Panel2/ark-analysis_21', sample1) # /091221 P9HuP28 #14 S15-014984 A4_[17537,46960]'
#mask_dir = os.path.join(masks_dir, sample1)
# expect mask fldr to have 2 files ending with _feature_0.tif and _feature_1.tif



cell_table_size_normalized, cell_table_arcsinh_transformed = \
    marker_quantification.generate_cell_table(segmentation_dir=os.path.join(tifs_dir, sample1),
                                              tiff_dir=tifs_dir,
                                              img_sub_folder="TIFs",
                                              is_mibitiff=False,
                                              # this subfolder is looked in for TIFs
                                              fovs=[sample1],
                                              batch_size=1
                                              )

                                              #nuclear_counts=nuclear_counts)

In [None]:
cell_table_size_normalized['label']

In [None]:
tifs_dir

In [None]:
# in function

import warnings
with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)

    
#fldr = 'Panel2/ark-analysis/091221 P9HuP28 #10 S16-003395 A1_[13463,50983]'
#tiff_dir = os.path.join(root_dir,'Panel2/ark-analysis_21')
sample1 = '091221 P9HuP28 #10 S16-003395 A1_[19347,49169]'
#mask_dir = 'Panel2/ark-analysis_21/091221 P9HuP28 #10 S16-003395 A1_[19347,49169]'
#mask_dir = os.path.join(root_dir,'Panel2/ark-analysis_21', sample1)
# expect mask fldr to have 2 files ending with _feature_0.tif and _feature_1.tif


def get_total_intensities(mask_dir, component_dir, roi_name):
    '''
    Arguments:
        mask_dir is path to mask file
        component_dir is path to where a 10 channel component image is
    '''
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        warnings.filterwarnings("ignore", category=FutureWarning)
        warnings.filterwarnings("ignore", 
                                message="The frame.append method is deprecated")
        # and will be removed from pandas in a future version. Use pandas.concat instead.")
    print(mask_dir, component_dir)    
    
    cell_table_size_normalized, cell_table_arcsinh_transformed = \
        marker_quantification.generate_cell_table(segmentation_dir=mask_dir,
                                                  tiff_dir=component_dir,
                                                  img_sub_folder="TIFs",
                                                  is_mibitiff=False,
                                                  # this subfolder is looked in for TIFs
                                                  fovs=[roi_name],
                                                  batch_size=1
                                                  )
        
    return (cell_table_size_normalized)



cell_table_size_norm = get_total_intensities(os.path.join(tifs_dir, sample1),
                                             tifs_dir, sample1)
cell_table_size_norm.reset_index(inplace=True)
cell_table_size_norm.rename({'index': 'cell_id'}, axis="columns", inplace=True)
cell_table_size_norm.head()



In [None]:
# enumerate sample rois
df_samples = pd.DataFrame(enumerate (sorted(os.listdir(tifs_dir))))
df_samples.columns = ['index', 'sample_nm']
print(len(df_samples))
df_samples.head()

In [None]:
# get sample_nm index

#df_cells2 = cell_table_size_norm.join(df_samples, how="inner", on=['fov', 'sample_nm'])
#print(len(df_cells2))

sample_idx = df_samples[df_samples['sample_nm']=='091221 P9HuP28 #13 S15-18369 A2_[14965,56532]']
sample_idx = df_samples[df_samples['sample_nm']=='091221 P9HuP28 #10 S16-003395 A1_[19347,49169]']
sample_idx.iloc[0,0]

In [None]:
def make_unique_cell_id(idx, cell_id):
    new_id = str(idx) + "." + str(cell_id)
    return (float(new_id))

make_unique_cell_id(2, 15)

In [None]:
tifs_dir

## ark-analysis measurements

In [None]:
# loop subfolders of ark-analysis
#mask_dir = os.path.join(root_dir, 'Panel2/ark-analysis_21')
#tiff_dir = os.path.join(root_dir, 'Panel2/ark-analysis_21')


# del full_cell_table
#gc.collect()
isFirst=True

for i, sample_nm in enumerate(sorted(os.listdir(tifs_dir))):
    print(sample_nm)
    if (os.path.isdir(os.path.join(tifs_dir, sample_nm))):
        print("getting ark metrics for {}".format(sample_nm))
        print (i)
        cell_table_size_norm = get_total_intensities(os.path.join(tifs_dir, sample_nm),
                                                     tifs_dir, 
                                                     sample_nm)

        if (len(cell_table_size_norm) == 0):
            continue
            
        # helpful columns
        cell_table_size_norm.reset_index(inplace=True)
        cell_table_size_norm['label'] =  cell_table_size_norm['label'].astype(int) 
        #cell_table_size_norm.rename({'index': 'cell_id'}, axis="columns", inplace=True)
        sample_idx = df_samples[df_samples['sample_nm']==sample_nm].iloc[0,0]
        print("processing sample idx {}".format(sample_idx))
        #sample_idx.iloc[0,0]
        cell_table_size_norm['sample_idx'] = sample_idx
        cell_table_size_norm['unique_cell_id'] = cell_table_size_norm.apply(
            lambda row: float(str(sample_idx) + "." + ("0000" + str(row.label))[-5:]), axis=1)
        if (isFirst):
            full_cell_table = cell_table_size_norm
            isFirst=False
        else:
            full_cell_table = pd.concat([full_cell_table, cell_table_size_norm], axis=0)     
            
               

In [None]:
# cell_table_size_norm

In [None]:
full_cell_table.to_csv(os.path.join(output_path, 'ark-measures.csv')) 

In [None]:
len(full_cell_table)

In [None]:
# summary by sample
full_cell_table.groupby('fov')['unique_cell_id'].count()

In [None]:
# any non-unique keys?
df_check_unique = full_cell_table.groupby('fov')['unique_cell_id'].value_counts()
df_check_unique[df_check_unique > 1]

In [None]:
# 0091221 P9HuP28 #13 S15-18369 A2_[14965,56532]
# idx 166
# ValueError: Cannot set a DataFrame with multiple columns to the single column unique_cell_id

# also 091221 P9HuP28 #13 S15-18369 A2_[5659,54652] 

In [None]:
print(len(full_cell_table))
full_cell_table.head()



In [None]:
colnames = full_cell_table.columns
colnames = [re.sub(r'centroid-0', r'Cell_Y_Position', a) for a in colnames]
colnames = [re.sub(r'centroid-1', r'Cell_X_Position', a) for a in colnames]

colnames = ['ark_' + col for col in colnames]
full_cell_table.columns = colnames

In [None]:
print(len(full_cell_table))
full_cell_table.head()

In [None]:
# export entire file or specific rois if we want

In [None]:
# save extracted data as csv for downstream analysis
# cell_table_size_normalized.to_csv(os.path.join(mask_dir, 'cell_table_size_normalized.csv'),
#                                  index=False)
# cell_table_arcsinh_transformed.to_csv(os.path.join(mask_dir, 'cell_table_arcsinh_transformed.csv'),
#                                      index=False)

In [None]:
def get_stats_for_file(mask_file, component_file, marker_list, roi_idx):
    '''
    Arguments: mask_file as full filename of cell segmentation mask
      img is greyscale all-channels image to use for intensity measurement
      marker_list is ordered list of labels for each channel
      idx is the iterator integer to keep cell ids unique
    Returns: dataframe of cells with feature measurements  
    '''

    props = ['label', 'area', 'eccentricity', 'major_axis_length', 'minor_axis_length',
             'perimeter', 'centroid', 'convex_area',
             'equivalent_diameter']
        
    # open files
    img_mask = io.imread(mask_file)
    img_component = io.imread(component_file, plugin="tifffile")
    sample_name = os.path.basename(component_file).replace('_component_data.tif','')
    sample_id = sample_name.split("_")[0]
    print(sample_id)
    # this will be location of components_data.tif files 
    path = os.path.dirname(component_file)  
    
    print(sample_name)
    print(path)
    
    # loop each channel for 
    for i in np.arange(len(marker_list)):
        #print("Processing {} {}".format(i, marker_list[i]))
        img_i = img_component[i,:,:]
        #print(img_i.shape)
        
        # accumulate dfs for each marker
        current_props = pd.DataFrame(regionprops_table(img_mask, 
                                                       intensity_image=img_i,
                                                       properties=['label', 'intensity_mean']))
        
        # colname like "Entire Cell CD8 (Opal 780) Mean"
        current_props.columns = ['label', 'Entire Cell '+ marker_list[i]+' (' + opals[i] +') Mean']
        current_props['point'] = str(roi_idx)
        current_props['cell_id'] = current_props['point'].astype(str) +"-"+ current_props['label'].astype(str)
        current_props.drop(['label', 'point'], axis=1, inplace=True)
        
        
        if (i==0):
            # merge generates duplicate columns; keep only one
            current_props['Sample_ID'] = sample_id
            current_props['Sample_Name'] = sample_name
            current_props['path'] = path
            df_merge = current_props
        else:
            df_merge = df_merge.merge(current_props, how='outer', left_on='cell_id', right_on='cell_id') 
    
    # get the rest of the properties
    props = pd.DataFrame(regionprops_table(img_mask, 
                          properties=props))   
    props['point'] = str(roi_idx)
    props['cell_id'] = props['point'].astype(str) +"-"+ props['label'].astype(str)

    df_merge2 = df_merge.merge(props, how='outer', left_on='cell_id', right_on='cell_id') 
    
    return df_merge2




df= get_stats_for_file(mask_file1, component_file1, channels, 0)

# for FCS, keep only cell_id and Entire Cell measures
# to keep only the Intensity measures
colnames =  [col for col in df.columns if 'Entire Cell' in col]
colnames = ['cell_id', 'Sample_ID'] + colnames 
print(colnames)

# let's keep the numeric columns:
df = df.select_dtypes(exclude='object')
print(df.columns)

# narrow down for now (FCS purposes)
#df = df[colnames]

df.head()




In [None]:
def get_stats_for_mantis(mask_file, component_file, marker_list):
    '''
    Arguments: mask_file as full filename of cell segmentation mask
      img is greyscale all-channels image to use for intensity measurement
      marker_list is ordered list of labels for each channel
    Returns: dataframe of cells with feature measurements in mantis format 
        note: cell_id remains untouched so it will match the mask
        and only numeric attributes allowed as features
    '''

    props = ['label', 'area', 'major_axis_length', 'minor_axis_length',
             'perimeter', 'centroid']
        
    # open files
    img_mask = io.imread(mask_file)
    img_component = io.imread(component_file, plugin="tifffile")
    sample_name = os.path.basename(component_file).replace('_component_data.tif','')
    sample_id = sample_name.split("_")[0]
    print(sample_id)
    # this will be location of components files but orig would be of im3 files
    path = os.path.dirname(component_file)  
    
    print(sample_name)
    print(path)
    
    #print(img_mask.shape)
    #print(img_component.shape)
    # loop each channel for 
    for i in np.arange(len(marker_list)):
        #print("Processing {} {}".format(i, marker_list[i]))
        img_i = img_component[i,:,:]
        #print(img_i.shape)
        
        # accumulate dfs for each marker
        current_props = pd.DataFrame(regionprops_table(img_mask, 
                                                       intensity_image=img_i,
                                                       properties=['label', 'intensity_mean']))
        
        # colname like "Entire Cell CD8 (Opal 780) Mean"
        current_props.columns = ['cell id', 'Entire Cell '+ marker_list[i]+' (' + opals[i] +') Mean']
        
        if (len(current_props) == 0):
            continue
            
        if (i==0):
            # merge generates duplicate columns
            df_merge = current_props
        else:
            df_merge = df_merge.merge(current_props, how='outer', left_on='cell id', right_on='cell id') 
    
    # get the rest of the properties
    props = pd.DataFrame(regionprops_table(img_mask, 
                          properties=props))  
    props.rename({'label':'cell id'}, axis=1, inplace=True)
    df_merge2 = df_merge.merge(props, how='outer', left_on='cell id', right_on='cell id') 
    
    return df_merge2



mask_file1
df= get_stats_for_mantis(mask_file1, component_file1, channels)
df.head()


In [None]:
# loop through all files, group by sample
isFirst=True
for i, mask_file in enumerate(fname_masks):
    
    comp_file = mask_file.replace("MASK_", "").replace("_rgb", "_component_data") \
        .replace(masks_dir, components_dir)
    comp_file
    os.path.exists(comp_file)
    
    sample_nm = os.path.basename(mask_file).replace("MASK_", "").replace("_rgb.tif", "")
    print("sample_nm {}".format(sample_nm))

    df= get_stats_for_file(mask_file, comp_file, channels, i)
    
    if (len(df) == 0):
        continue
    
    # adding
    df.rename({'cell_id': 'temp_cell_id'}, axis="columns", inplace=True)
    df.reset_index(inplace=True)  # already covered
    #df.rename({'index': 'cell_id'}, axis="columns", inplace=True)
    # Lookup sample index from same table as ark data, for good matching key
    sample_idx = df_samples[df_samples['sample_nm']==sample_nm].iloc[0,0]
    #sample_idx.iloc[0,0]
    df['sample_idx'] = sample_idx
    df['unique_cell_id'] = df.apply(
        lambda row: float(str(sample_idx) + "." + ("0000" + str(row.label))[-5:]), axis=1)
        
    if (isFirst): 
        df_full = df
        isFirst=False
    else:
        df_full = pd.concat([df_full, df], axis=0) # same as append()
        #df_full = df_full.append(df)
        

In [None]:
print(len(df_full))
df_full.head()

In [None]:
# summary by sample
df_full.groupby('Sample_Name')['unique_cell_id'].count()

In [None]:
df_full.columns
df_full['label'] = np.arange(len(df_full))


In [None]:
df_full  # 170240 rows × 25 columns

### Combine df_full and df_

In [None]:
df_all = df_full.merge (full_cell_table, how="inner", left_on="unique_cell_id", right_on="ark_unique_cell_id" )

print(len(df_full), len(df_all))
# 292632 370369 means some unique cell ids are not unique

In [None]:
df_all.to_csv(os.path.join(output_path, 'panel2_cell_features_all_samples.csv'), header=True)

In [None]:
df_full.to_csv(os.path.join(output_path, 'panel2_mean-cell-measures.csv'), header=True)

In [None]:
df_samples.iloc[66,]

## Extra Play Stuff (ignore)

In [None]:
# Loop all for Mantis file structure

mantis_dir = os.path.join(root_dir, 'Panel2/mantis-viewer')
print('mantis dir exists: {}'.format(os.path.exists(mantis_dir)))


for i, mask_file in enumerate(fname_masks[:1]):
    
    comp_file = mask_file.replace("MASK_", "").replace("_rgb", "_component_data") \
        .replace(masks_dir, components_dir)
    comp_file
    os.path.exists(comp_file)
    
    sample_name = os.path.basename(comp_file).replace('_component_data.tif','')
    sample_id = sample_name.split("_")[0]
    print(sample_name)  


    if not os.path.exists(os.path.join(mantis_dir, sample_name)):
        print('create dir {}'.format(os.path.join(mantis_dir, sample_name)))
        os.mkdir(os.path.join(mantis_dir, sample_name))
        
    mantis_dir_roi = os.path.join(mantis_dir, sample_name)    
    shutil.copy(comp_file, mantis_dir_roi)
    shutil.copy(mask_file, mantis_dir_roi)
    
    df_roi = get_stats_for_mantis(mask_file, comp_file, channels)
    print(df_roi.columns)
    df_roi.to_csv(os.path.join(mantis_dir_roi, 'cell_features.csv'), header=True)
    



## Loop all for FCS

In [None]:
# chop up df_full by sample to create FCS files

# for each sample id, select from df_full
sample_id_list = pd.unique(df_full['Sample_ID'])
print(len(sample_id_list),sample_id_list[:2])

for i, sample_id in enumerate(sample_id_list):
    
    # get our sample (multiple rois)
    sample_df = df_full[df_full['Sample_ID']==sample_id]
    sample_df = sample_df.select_dtypes(exclude='object')
    
    sample_from_df = fk.Sample(sample_df, sample_id=sample_id)
    # e.g. 091221 P9HuP28 #14 S15-014984 A4
    fcs_filename = sample_id + '.fcs'
    sample_from_df.export(os.path.join(root_dir, fcs_path,fcs_filename),
                     source='raw')
    print('Export {} rows to {}'.format(sample_from_df.event_count, fcs_filename))
    

In [None]:
print(df_full.columns)
# to csv to match Consolidated_data.txt

df_full.to_csv(os.path.join(root_dir, 'Panel2','cellpose_metrics.csv'))




In [None]:
# Add some columns we need for phenoptr:

df_full["Tissue Category"] = "Total"
df_full["Region ID"] = 0
df_full['path'] = '\\data.ucdenver.pvt\dept\SOM\HIMSR\Archive\All\Polaris\Data\VectraPolaris \
                    \Woods 2021\091021 P9HuP27 #01 S18-20937 F4\Scan1\MSI'



#Sample_Name = '091021 P9HuP27 #01 S18-20937 F4_[10177,41847].im3'
#Lab ID
Slide_ID  = '091021 P9HuP27 #01 S18-20937 F4'

# For Consolidated data format, we'll need Phenotypes e.g. CD8 as CD8+ or CD8- from gating output.

In [None]:

# list comprehension to make string changes e.g. cell_id to fil
colnames = df_full.columns
colnames = [re.sub(r'centroid-0', r'Cell Y Position', a) for a in colnames]
colnames = [re.sub(r'centroid-1', r'Cell X Position', a) for a in colnames]

# columns I want include
# tag
# Sample Name 
# e.g. 'Entire Cell CD3 (Opal 480) Mean'
# Tissue Category = 'Total'
# Region ID = 1

#df_full.columns = colnames
colnames

In [None]:
# ensure cell labels remain unique

# list comprehension to make string changes e.g. cell_id to fil
colnames = df_full.columns
colnames = [re.sub(r'cell_id', r'fil', a) for a in colnames]


In [None]:
df.head()
df.drop('cell_id', axis=1, inplace=True)
df.drop('Sample_ID', axis=1, inplace=True) # 091221 P9HuP28 #14 S15-014984 A4


In [None]:
#df = df.reset_index()
df.rename(columns={'label': 'fil'}, inplace=True)

In [None]:
df.head()

In [None]:
# to FCS with flowkit
# https://flowkit.readthedocs.io/en/latest/notebooks/flowkit-tutorial-part01-sample-class.html


sample_from_df = fk.Sample(df, sample_id='091221 P9HuP28 #14 S15-014984 A4')
sample_from_df

# sample1 = fk.Sample(
#     fcs_path_or_data = df_full,
#     id=None,
#     channel_labels=None,
#     compensation=None,
#     null_channel_list=None,
#     ignore_offset_error=False,
#     ignore_offset_discrepancy=False,
#     use_header_offsets=False,
#     cache_original_events=False
#     #subsample=10000
# )

In [None]:
# Q: do we load individual FCS files for each slide? (aka Sample), (up to 15 slides per patient)
#.  or treat as concatenated file with additional attributes? 

In [None]:
sample_from_df.channels

In [None]:
sample_from_df.get_metadata()

In [None]:
sample_from_df.channels

In [None]:
sample_from_df.pnn_labels


In [None]:
sample_from_df.pns_labels

In [None]:
help(sample_from_df.plot_histogram)

In [None]:
# plotting a histogram
p = sample_from_df.plot_histogram('Entire Cell CD3 (Opal 480) Mean', source='raw')
show(p)

In [None]:
# by default, plot_contour uses sub-sampled events for performance
x_min = y_min = 0
x_max = y_max = 250

f = sample_from_df.plot_contour('Entire Cell CD4 (Opal 540) Mean', 
                                'Entire Cell CD8 (Opal 780) Mean', 
                                source='raw')
                                #x_min=x_min, x_max=x_max, y_min=y_min, y_max=y_max)
print(type(f))
plt.show(f)

In [None]:
# plotting a matplotlib histogram
plt.hist(df["Entire Cell CD3 (Opal 480) Mean"])
plt.show()

In [None]:
p = sample_from_df.plot_scatter(
    'Entire Cell CD4 (Opal 540) Mean', 
    'Entire Cell CD8 (Opal 780) Mean', 
    source='raw',
    #y_min=0., y_max=130, x_min=0., x_max=280, 
    color_density=True
)
show(p)

In [None]:
xform = fk.transforms.LogicleTransform('my_logicle', param_t=1024, param_w=0.5, param_m=4.5, param_a=0)
sample_from_df.apply_transform(xform)
# source is 'raw' so not too useful for visualization
p = sample_from_df.plot_scatter('Entire Cell CD4 (Opal 540) Mean', 
                        'Entire Cell CD8 (Opal 780) Mean', 
                        source='raw')
show(p)



In [None]:

# 091221 P9HuP28 #14 S15-014984 A4
sample_from_df.export(os.path.join(root_dir, fcs_path,'091221 P9HuP28 #14 S15-014984 A4_test1.fcs'),
                     source='raw')