In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tifffile
import cv2
from PIL import Image
from PIL import ImageDraw
from skimage import data, color, io, img_as_float
from os import listdir
from os.path import isfile, join
import skimage.io
import skimage.util

Image.MAX_IMAGE_PIXELS = None

In [2]:
# create dictionary of patient names
name_dirs = {'p1':'P51', 'p2':'P52', 'p3':'P53', 'p4':'P56', 'p5':'P57', 'p6':'P58'}
toGegeID = {'P51':'p1', 'P52':'p2', 'P53':'p3', 'P56':'p4', 'P57':'p5', 'P58':'p6'}

# Load prediction files
pred_location = 'FromGege/celltype_v8-EP.csv'

In [3]:
# read in celltype predictions
df = pd.read_csv(pred_location)

# reformat columns
df3 = pd.concat([df['x'], df['Unnamed: 0'].str.split('_', expand=True)], axis=1)
df3 = df3.rename(columns={"x": "celltype_detail", 0: "PtID", 3: "cell_id"})
df3['FOV'] = df3[1].apply(lambda x: x[-2:])
df3['patients'] = df3['PtID'].apply(lambda x: toGegeID[x])
df3 = df3.drop(columns=[1,2])
morph_predicted = df3[['patients', 'celltype_detail', 'FOV', 'cell_id']]

# define types to remove before further analysis
types_to_remove = ['NoCellAssigned','RBC','SmallCell','Unknown']

# filter out NA cells
morph_predicted = morph_predicted[~morph_predicted['celltype_detail'].isin(types_to_remove)]
morph_predicted

Unnamed: 0,patients,celltype_detail,FOV,cell_id
1,p1,CD8TeffectorGZMK,01,1
2,p1,MonocytesCD16,01,2
3,p1,Mega,01,3
4,p1,Mega,01,4
5,p1,NK,01,5
...,...,...,...,...
625140,p6,ErythroidProgenitors,23,7962
625141,p6,MatureB,23,7963
625142,p6,ErythroidProgenitors,23,7966
625143,p6,Mega,23,7971


In [4]:
# load FOV metadata to add timepoint info
FOV_metadata = pd.read_csv('/data/Zhaolab/1_AMLCosMx/Final_scripts/4_CellTyping/FOV_metadata.csv', index_col=0)
FOV_metadata.loc['P57_FOV17'] = ['C', 'CR'] # add row for P57 FOV17

morph_predicted['Timepoint'] = ''
for i in range(morph_predicted.shape[0]):
    alt_pt_name = name_dirs[morph_predicted['patients'].iloc[i]]
    fov = morph_predicted['FOV'].iloc[i]
    sample = alt_pt_name + '_FOV' + fov
    morph_predicted['Timepoint'].iloc[i] = FOV_metadata['Timepoint'].loc[sample]
    
morph_predicted

Unnamed: 0,patients,celltype_detail,FOV,cell_id,Timepoint
1,p1,CD8TeffectorGZMK,01,1,A
2,p1,MonocytesCD16,01,2,A
3,p1,Mega,01,3,A
4,p1,Mega,01,4,A
5,p1,NK,01,5,A
...,...,...,...,...,...
625140,p6,ErythroidProgenitors,23,7962,C
625141,p6,MatureB,23,7963,C
625142,p6,ErythroidProgenitors,23,7966,C
625143,p6,Mega,23,7971,C


In [1]:
# get cell type names
class_column = 'celltype_detail'
cluster_labels = list(set(morph_predicted[class_column].tolist()))
cluster_labels.sort()

# read colors from csv
colors = pd.read_csv('color_v8.csv', index_col=0)
colors = colors.T
cluster_colors = {}
for celltype in cluster_labels:
    cluster_colors[celltype] = [colors['blue'].loc[celltype], colors['green'].loc[celltype], colors['red'].loc[celltype]]

NameError: name 'morph_predicted' is not defined

In [6]:
cluster_colors

{'B': [51, 170, 255],
 'CD4Tmemory': [205, 166, 108],
 'CD4Tnaive': [255, 239, 191],
 'CD8TeffectorGZMH': [205, 150, 205],
 'CD8TeffectorGZMK': [205, 104, 137],
 'CD8Tnaive': [255, 204, 204],
 'DC': [185, 218, 255],
 'ErythroidProgenitors': [114, 128, 250],
 'LeukemiaCell': [255, 0, 255],
 'MatureB': [0, 215, 255],
 'Mega': [203, 192, 255],
 'MonocytesCD14': [124, 205, 124],
 'MonocytesCD16': [84, 139, 84],
 'MonocytesProgenitor': [152, 251, 152],
 'NK': [204, 204, 204],
 'Plasma': [0, 173, 205],
 'ProgenitorB': [0, 255, 255]}

In [7]:
clusters = list(cluster_colors.keys())

In [8]:
clusters

['B',
 'CD4Tmemory',
 'CD4Tnaive',
 'CD8TeffectorGZMH',
 'CD8TeffectorGZMK',
 'CD8Tnaive',
 'DC',
 'ErythroidProgenitors',
 'LeukemiaCell',
 'MatureB',
 'Mega',
 'MonocytesCD14',
 'MonocytesCD16',
 'MonocytesProgenitor',
 'NK',
 'Plasma',
 'ProgenitorB']

In [10]:
# move Leukemia to first in list (and therefore first in legend)
clusters.insert(0, clusters.pop(clusters.index('LeukemiaCell')))

# move B to last in list
clusters.insert(-1, clusters.pop(clusters.index('B')))
# move mature B to last in list
clusters.insert(-1, clusters.pop(clusters.index('MatureB')))

# move mature Tcell with other T cells
#clusters.insert(3, clusters.pop(clusters.index('Tcell')))
clusters

['LeukemiaCell',
 'CD4Tmemory',
 'CD4Tnaive',
 'CD8TeffectorGZMH',
 'CD8TeffectorGZMK',
 'CD8Tnaive',
 'DC',
 'ErythroidProgenitors',
 'Mega',
 'MonocytesCD14',
 'MonocytesCD16',
 'MonocytesProgenitor',
 'NK',
 'Plasma',
 'B',
 'MatureB',
 'ProgenitorB']

In [11]:
patients = ['p1', 'p2', 'p3', 'p4', 'p5', 'p6']

## Visalize cell types

In [12]:
for patient in patients:
    
    # subset df to one patient
    one_pt = morph_predicted[morph_predicted['patients'] == patient]
    alt_pt_name = name_dirs[patient]
    
    timepoints = list(set(one_pt['Timepoint'].tolist()))
    timepoints.sort()
    
    timepoint_stacks = []
    for time in timepoints:
        
        # subset only this timepoint
        subset = one_pt[one_pt['Timepoint'] == time]
        
        # get list of FOVs
        fovs = list(set(subset['FOV'].tolist()))
        fovs.sort()
        
        # read in each FOV in this sample, concatenate in column
        sample_imgs = []
        for i in range(0,11):
            if len(fovs) > i:
                label = time + '_FOV' + fovs[i]
                img = cv2.imread('visualize_all_celltypes_nolabels/' + alt_pt_name + '_FOV' + fovs[i] + '.png')
                img = cv2.putText(img, label, (80, 350), cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale=10, color=(255, 255, 255), thickness=15)
            else:
                img = np.zeros((3648, 5472, 3), dtype=np.int8)
                
            sample_imgs.append(img)
            
        sample_stack = np.vstack(sample_imgs)
        del sample_imgs
        timepoint_stacks.append(sample_stack)
        del sample_stack
        
    pt_montage = np.hstack(timepoint_stacks)
    del timepoint_stacks
    
    # add color key in bottom left corner
    
    if patient == 'p4':
        x_seed = 6000
    else:
        x_seed = 0
    
    y_seed = pt_montage.shape[0] - 10000
    for i in range(len(clusters)):
        
        cluster_name = clusters[i]
        x_color = [int(v) for v in cluster_colors[cluster_name]] # convert color code fron int64 to int
        pt_montage = cv2.putText(pt_montage, cluster_name, (x_seed + 400, y_seed), cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale=10, color=(255,255,255), thickness=15)
        pt_montage = cv2.rectangle(pt_montage, (x_seed + 100, y_seed-200), (x_seed + 300, y_seed), x_color, 
                                   thickness=-1)
        y_seed += 500
    
    # cv2.imwrite('Visualize_celltypes_per_patient/' + patient + '_all_fovs_celltypes.png', pt_montage)
    
    # save as pdf
    pt_montage_rgb = np.flip(pt_montage, axis=2)
    image_1 =  Image.fromarray(pt_montage_rgb.astype(np.uint8))
    im_1 = image_1.convert('RGB')
    width, height = im_1.size
    im_1_smaller = im_1.resize((int(width/4), int(height/4)))
    im_1_smaller.save(r'Visualize_celltypes_per_patient/' + patient + '_all_fovs_celltypes.pdf')    

## Visalize normalized images

In [13]:
# create dictionary of directories for each patient
img_dirs = {'P51':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P51_R1158_S1_Normalized_DAPI_B2M_CD34/', 
            'P52':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P52_R1149_S1_Normalized_DAPI_B2M_CD34/', 
            'P53':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P53_R1149_S2_Normalized_DAPI_B2M_CD34/', 
            'P56':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P56_R1158_S2_Normalized_DAPI_B2M_CD34/', 
            'P57':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P57_R1158_S3_Normalized_DAPI_B2M_CD34/', 
            'P58':'/data/Zhaolab/1_AMLCosMx/Final_scripts/1_Normalization/0_NormalizedImg/P58_R1149_S3_Normalized_DAPI_B2M_CD34/'}

In [14]:
for patient in patients:
    
    # subset df to one patient
    one_pt = morph_predicted[morph_predicted['patients'] == patient]
    alt_pt_name = name_dirs[patient]
    
    timepoints = list(set(one_pt['Timepoint'].tolist()))
    timepoints.sort()
    
    timepoint_stacks = []
    for time in timepoints:
        
        # subset only this timepoint
        subset = one_pt[one_pt['Timepoint'] == time]
        
        # get list of FOVs
        fovs = list(set(subset['FOV'].tolist()))
        fovs.sort()
        
        # read in each FOV in this sample, concatenate in column
        sample_imgs = []
        for i in range(0,11):
            if len(fovs) > i:
                label = alt_pt_name + '_' + time + '_FOV' + fovs[i]
                # Load img
                location = img_dirs[alt_pt_name]
                dirlist = listdir(location)
                dirlist.sort()
                for item in dirlist:
                    if 'F0' + fovs[i] in item:
                        img = cv2.imread(location + item)
                        print(item)
                        break
                img = cv2.putText(img, label, (80, 350), cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale=10, color=(255, 255, 255), thickness=15)
                # brighten image
                alpha = 1.5 # Contrast control
                beta = 35 # Brightness control

                # call convertScaleAbs function
                img = cv2.convertScaleAbs(img, alpha=alpha, beta=beta)
            else:
                img = np.zeros((3648, 5472, 3), dtype=np.int8)
                
            sample_imgs.append(img)
            
        sample_stack = np.vstack(sample_imgs)
        del sample_imgs
        timepoint_stacks.append(sample_stack)
        del sample_stack
        
    pt_montage = np.hstack(timepoint_stacks)
    del timepoint_stacks
    
    # save as pdf
    pt_montage_rgb = np.flip(pt_montage, axis=2)
    image_1 =  Image.fromarray(pt_montage_rgb.astype(np.uint8))
    im_1 = image_1.convert('RGB')
    width, height = im_1.size
    im_1_smaller = im_1.resize((int(width/4), int(height/4)))
    im_1_smaller.save(r'Visualize_FOVs_per_patient/' + patient + '_all_fovs.pdf')    

20220228_173209_S1_C902_P99_N99_F001_normalized.png
20220228_173209_S1_C902_P99_N99_F002_normalized.png
20220228_173209_S1_C902_P99_N99_F003_normalized.png
20220228_173209_S1_C902_P99_N99_F004_normalized.png
20220228_173209_S1_C902_P99_N99_F005_normalized.png
20220228_173209_S1_C902_P99_N99_F006_normalized.png
20220228_173209_S1_C902_P99_N99_F007_normalized.png
20220228_173209_S1_C902_P99_N99_F008_normalized.png
20220228_173209_S1_C902_P99_N99_F009_normalized.png
20220228_173209_S1_C902_P99_N99_F010_normalized.png
20220228_173209_S1_C902_P99_N99_F011_normalized.png
20220228_173209_S1_C902_P99_N99_F012_normalized.png
20220228_173209_S1_C902_P99_N99_F013_normalized.png
20220228_173209_S1_C902_P99_N99_F014_normalized.png
20220228_173209_S1_C902_P99_N99_F015_normalized.png
20220228_173209_S1_C902_P99_N99_F016_normalized.png
20220228_173209_S1_C902_P99_N99_F017_normalized.png
20220228_173209_S1_C902_P99_N99_F018_normalized.png
20220228_173209_S1_C902_P99_N99_F019_normalized.png
20220228_173