In [1]:
import sys
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.pylab as pl
from matplotlib.colors import ListedColormap
import numpy as np
import pandas as pd
import umap
from mpl_toolkits.mplot3d import Axes3D
from skimage import measure
from sklearn.decomposition import PCA

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Import path for config
module_path = str(Path.cwd().parents[0])
if module_path not in sys.path:
    sys.path.append(module_path)

# Import path for src
module_path = str(Path.cwd().parents[0] / "src")

if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
from config import *
import my_io as io

# Import metadata information

In [5]:
df_metadata = pd.read_csv(data_dir / 'metadata.csv')
df_metadata.head()

Unnamed: 0,Pos,Grades,Stage,Directory
0,B1,2,I,Y:\coskun-lab\Thomas\Lung IMC\data\ROI\ROI001_B1
1,B3,2,I,Y:\coskun-lab\Thomas\Lung IMC\data\ROI\ROI005_B3
2,B5,2,II,Y:\coskun-lab\Thomas\Lung IMC\data\ROI\ROI006_B5
3,C6,AT,AT,Y:\coskun-lab\Thomas\Lung IMC\data\ROI\ROI007_C6
4,D7,3,IIIA,Y:\coskun-lab\Thomas\Lung IMC\data\ROI\ROI008_D7


In [6]:
datasets = df_metadata.Directory.to_list()
datasets = [name.split('\\')[-1] for name in datasets]

In [7]:
print(datasets)

['ROI001_B1', 'ROI005_B3', 'ROI006_B5', 'ROI007_C6', 'ROI008_D7', 'ROI009_D9', 'ROI012_E1', 'ROI015_B6', 'ROI016_F3', 'ROI017_F2', 'ROI018_G1', 'ROI019_G5', 'ROI020_E5', 'ROI021_E7']


In [8]:
# Define which dataset to work on 
dataset = datasets[4]
print(dataset)

ROI008_D7


# Read images and cell mask

In [9]:
imgs, markers = io.read_img(dataset, data_ROI)
marker2img = dict(zip(markers, imgs))
print(f'Image stack shape of {imgs.shape}')

Image stack shape of (22, 800, 800)


In [10]:
mask = io.read_cell_mask(dataset, data_ROI)
print(f'Mask shape of {mask.shape}')
print(f'Number of cell is {mask.max()}')

Mask shape of (800, 800)
Number of cell is 3815


In [11]:
def get_cell_expression_level(marker2img_dict, mask):
    # Get each cell region 
    morph_prop = pd.DataFrame(measure.regionprops_table(mask, properties=['label', 'area'])).set_index('label')
    
    # Multiple each cell region by mean intensity 
    df_list = []
    for marker, img in marker2img_dict.items():
        exp_prop = pd.DataFrame(measure.regionprops_table(mask, img, properties=['label','mean_intensity'])).set_index('label')
        df = pd.DataFrame(morph_prop.values * exp_prop.values, index=morph_prop.index, columns=[marker])
        df_list.append(df)
        
    df_exp = pd.concat(df_list, axis=1)
    return df_exp 

df_exp = get_cell_expression_level(marker2img, mask)

In [12]:
df_exp.index.names = ['cell_ID']
df_exp.reset_index(inplace=True)
df_exp['cell_ID'] = 'cell_' + df_exp['cell_ID'].astype(str)
df_exp.to_csv('D7_expression.txt', sep='\t', index=False)
df_exp.head()

Unnamed: 0,cell_ID,SMA,Vimentin,PanKeratin,H3K9me3,PDL1,CD44,FoxP3,CD4,ECadherin,...,PD1,GranzymeB,Ki67,Col1,CD3,Histone3,CD45RO,MHCII,DNA1,DNA2
0,cell_1,1167.0,1171.0,104.0,1320.0,360.0,317.0,157.0,461.0,506.0,...,124.0,105.0,137.0,464.0,455.0,3051.0,962.0,2737.0,8027.0,9616.0
1,cell_2,914.0,1988.0,2013.0,5314.0,908.0,435.0,325.0,189.0,3058.0,...,188.0,614.0,254.0,310.0,346.0,8401.0,1504.0,12624.0,17860.0,22686.0
2,cell_3,921.0,1214.0,137.0,744.0,246.0,198.0,51.0,337.0,542.0,...,101.0,103.0,23.0,630.0,229.0,1804.0,1140.0,2265.0,9271.0,11707.0
3,cell_4,542.0,1590.0,3536.0,4278.0,683.0,368.0,325.0,464.0,3214.0,...,112.0,1434.0,244.0,478.0,507.0,7364.0,2222.0,7520.0,19102.0,23370.0
4,cell_5,579.0,1778.0,896.0,3768.0,809.0,414.0,218.0,119.0,1802.0,...,207.0,280.0,69.0,247.0,259.0,6559.0,847.0,7757.0,8050.0,10211.0


In [19]:
df_coord = pd.DataFrame(measure.regionprops_table(mask, properties=['label','centroid'])).set_index('label')
df_coord.index.names = ['cell_ID']
df_coord.columns = ['Y', 'X']
df_coord = df_coord[['X','Y']]
df_coord['Y'] = -df_coord['Y']
df_coord.reset_index(inplace=True)
df_coord['cell_ID'] = 'cell_' + df_coord['cell_ID'].astype(str)
df_coord.to_csv('D7_coord.txt', sep='\t', index=False)
df_coord.head()

Unnamed: 0,cell_ID,X,Y
0,cell_1,149.347826,-5.086957
1,cell_2,297.873016,-4.626984
2,cell_3,767.955882,-2.602941
3,cell_4,108.853846,-6.061538
4,cell_5,553.796875,-6.4375


In [18]:
df_meta = pd.DataFrame(measure.regionprops_table(mask, properties=['label', 'area'])).set_index('label')
df_meta.index.names = ['cell_ID']
df_meta.columns = ['Area']
df_meta.reset_index(inplace=True)
df_meta['cell_ID'] = 'cell_' + df_meta['cell_ID'].astype(str)
df_meta.to_csv('D7_meta.txt', sep='\t', index=False)
df_meta.head()

Unnamed: 0,cell_ID,Area
0,cell_1,46
1,cell_2,126
2,cell_3,68
3,cell_4,130
4,cell_5,64
