In [41]:
import math
import os
import shutil
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scanpy as sc
import seaborn as sns
import tifffile
import torchvision.transforms.functional as TF
import torch
from pathlib import Path
from collections import Counter

from einops import rearrange

In [42]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [43]:
%autoreload 2

In [44]:
from mip.utils import listfiles, extract_ome_tiff, R_CHANNEL_MAPPING, make_pseudo, display_region

In [45]:
out_dir = '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/classification'
Path(out_dir).mkdir(parents=True, exist_ok=True)

In [46]:
fps = sorted(listfiles('/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2',
                      regex=r'.pt$'))
fps

['/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT110B1_S1H4_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT171B1-S1H9A1-4_left_05122023_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT171B1-S1H9A1-4_right_05122023_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT206B1_H1_06252022_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT206B1_H1_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT206B1_S1H2L4_20221028_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT243B1-S1H4A4_04192023_results.pt',
 '/diskmnt/Projects/Users/estorrs/multiplex_data/analysis/brca_dcis_v2/region_analysis_v2/HT243B1-S1H4A4_left_05122023_resul

In [47]:
sample_to_tables = {}
sample_to_thumbnail = {}
for fp in fps:
    sample = fp.split('/')[-1].split('_results')[0]
    print(sample)
    
    obj = torch.load(fp)
    sample_to_tables[sample] = obj['tables']
    sample_to_thumbnail[sample] = obj['images']['thumbnail']['pseudo']
sample_to_tables.keys()

HT110B1_S1H4
HT171B1-S1H9A1-4_left_05122023
HT171B1-S1H9A1-4_right_05122023
HT206B1_H1_06252022
HT206B1_H1
HT206B1_S1H2L4_20221028
HT243B1-S1H4A4_04192023
HT243B1-S1H4A4_left_05122023
HT243B1-S1H4A4_right_05122023
HT243B1_S1H4
HT271B1-S1H6A5_04192023
HT271B1-S1H6A5_left_05122023
HT271B1-S1H6A5_right_05122023
HT297B1_H1_08042022
HT305B1_S1H1
HT308B1-S1H5A4_04192023
HT308B1-S1H5A4_left_05122023
HT308B1-S1H5A4_right_05122023
HT323B1_H1A1
HT323B1_H1A4
HT323B1_H1_08042022
HT323B1_H3
HT339B1_H1A1
HT339B1_H2A1
HT339B1_H4A4
HT365B1_S1H1_02132023
HT397B1_H2A2
HT397B1_H3A1
HT397B1_S1H1A3U22_04122023
HT397B1_S1H1A3U31_04062023
HT397B1_U12_03172023
HT397B1_U2_03162023
HT480B1_S1H2_R001
HT480B1_S1H2_R002
HT565B1-H2_04262023


dict_keys(['HT110B1_S1H4', 'HT171B1-S1H9A1-4_left_05122023', 'HT171B1-S1H9A1-4_right_05122023', 'HT206B1_H1_06252022', 'HT206B1_H1', 'HT206B1_S1H2L4_20221028', 'HT243B1-S1H4A4_04192023', 'HT243B1-S1H4A4_left_05122023', 'HT243B1-S1H4A4_right_05122023', 'HT243B1_S1H4', 'HT271B1-S1H6A5_04192023', 'HT271B1-S1H6A5_left_05122023', 'HT271B1-S1H6A5_right_05122023', 'HT297B1_H1_08042022', 'HT305B1_S1H1', 'HT308B1-S1H5A4_04192023', 'HT308B1-S1H5A4_left_05122023', 'HT308B1-S1H5A4_right_05122023', 'HT323B1_H1A1', 'HT323B1_H1A4', 'HT323B1_H1_08042022', 'HT323B1_H3', 'HT339B1_H1A1', 'HT339B1_H2A1', 'HT339B1_H4A4', 'HT365B1_S1H1_02132023', 'HT397B1_H2A2', 'HT397B1_H3A1', 'HT397B1_S1H1A3U22_04122023', 'HT397B1_S1H1A3U31_04062023', 'HT397B1_U12_03172023', 'HT397B1_U2_03162023', 'HT480B1_S1H2_R001', 'HT480B1_S1H2_R002', 'HT565B1-H2_04262023'])

In [48]:
type_to_combined = {}
for k in next(iter(sample_to_table.values())).keys():
    combined = None
    for s, tables in sample_to_table.items():
        df = tables[k].copy()
        df.index = [f'{s}_{x}' for x in df.index]
        df.index.name = 'label'
        df['sample'] = s

        df = df.transpose()

        if combined is None:
            combined = df
        else:
            combined = pd.merge(combined, df, left_index=True, right_index=True, how='outer')
    combined = combined.transpose()
    combined.index.name = 'label'
    type_to_combined[k] = combined

Unnamed: 0_level_0,area,c1,c2,cell_type_fraction_B cell,cell_type_fraction_CD4 T cell,cell_type_fraction_CD8 T cell,cell_type_fraction_Endothelial,cell_type_fraction_Epithelial,cell_type_fraction_Immune,cell_type_fraction_Macrophage - M1,...,marker_intensity_TFF1 (D),marker_intensity_Vimentin,marker_intensity_a-Amylase,marker_intensity_beta-integrin,marker_intensity_cKit,perimeter,r1,r2,row,sample
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HT110B1_S1H4_1,15584,4569,4678,0.0,,0.0,,1.0,0.0,0.0,...,,2.21952,,,1.924538,579.747258,13113,13332,13219.493134,HT110B1_S1H4
HT110B1_S1H4_2,124527,5316,5801,0.0,,0.007194,,0.971223,0.007194,0.0,...,,26.210075,,,8.830077,2105.349422,15628,16169,15907.752656,HT110B1_S1H4
HT110B1_S1H4_3,53376,6320,6584,0.0,,0.0,,0.988095,0.0,0.0,...,,10.42995,,,4.571661,1587.359523,15648,16080,15903.167697,HT110B1_S1H4
HT110B1_S1H4_4,27860,5989,6171,0.0,,0.0,,0.960784,0.0,0.0,...,,19.044903,,,3.973439,911.327994,15910,16143,16024.476741,HT110B1_S1H4
HT110B1_S1H4_5,11164,4968,5059,0.0,,0.0,,1.0,0.0,0.0,...,,7.892153,,,3.456467,622.232539,15929,16133,16035.594411,HT110B1_S1H4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
HT565B1-H2_04262023_104,93531,5207,5732,,0.0,0.011111,0.533333,0.355556,0.077778,,...,11.539308,124.236884,3.556094,,9.951129,1918.278354,14329,14800,14569.474516,HT565B1-H2_04262023
HT565B1-H2_04262023_105,17642,8658,8822,,0.1,0.4,0.133333,0.366667,0.0,,...,9.869856,38.46344,1.339191,,3.289196,569.487373,14337,14511,14421.758247,HT565B1-H2_04262023
HT565B1-H2_04262023_106,346698,3721,5114,,0.004525,0.013575,0.294118,0.597285,0.081448,,...,12.282214,111.742361,2.966143,,10.963692,4397.380951,14389,14800,14660.152256,HT565B1-H2_04262023
HT565B1-H2_04262023_107,221671,9054,10267,,0.0,0.0,0.0,1.0,0.0,,...,3.518999,68.631034,1.212657,,3.011224,3347.207286,14496,14800,14681.703633,HT565B1-H2_04262023


In [49]:
samples = sorted(sample_to_tables.keys())
samples

['HT110B1_S1H4',
 'HT171B1-S1H9A1-4_left_05122023',
 'HT171B1-S1H9A1-4_right_05122023',
 'HT206B1_H1',
 'HT206B1_H1_06252022',
 'HT206B1_S1H2L4_20221028',
 'HT243B1-S1H4A4_04192023',
 'HT243B1-S1H4A4_left_05122023',
 'HT243B1-S1H4A4_right_05122023',
 'HT243B1_S1H4',
 'HT271B1-S1H6A5_04192023',
 'HT271B1-S1H6A5_left_05122023',
 'HT271B1-S1H6A5_right_05122023',
 'HT297B1_H1_08042022',
 'HT305B1_S1H1',
 'HT308B1-S1H5A4_04192023',
 'HT308B1-S1H5A4_left_05122023',
 'HT308B1-S1H5A4_right_05122023',
 'HT323B1_H1A1',
 'HT323B1_H1A4',
 'HT323B1_H1_08042022',
 'HT323B1_H3',
 'HT339B1_H1A1',
 'HT339B1_H2A1',
 'HT339B1_H4A4',
 'HT365B1_S1H1_02132023',
 'HT397B1_H2A2',
 'HT397B1_H3A1',
 'HT397B1_S1H1A3U22_04122023',
 'HT397B1_S1H1A3U31_04062023',
 'HT397B1_U12_03172023',
 'HT397B1_U2_03162023',
 'HT480B1_S1H2_R001',
 'HT480B1_S1H2_R002',
 'HT565B1-H2_04262023']

In [50]:
sorted(type_to_combined['region'].columns)

['area',
 'c1',
 'c2',
 'cell_type_fraction_B cell',
 'cell_type_fraction_CD4 T cell',
 'cell_type_fraction_CD8 T cell',
 'cell_type_fraction_Endothelial',
 'cell_type_fraction_Epithelial',
 'cell_type_fraction_Immune',
 'cell_type_fraction_Macrophage - M1',
 'cell_type_fraction_Stroma',
 'cell_type_fraction_Treg',
 'cell_type_fraction_Unlabeled',
 'col',
 'compactness',
 'marker_fraction_CD11b',
 'marker_fraction_CD163',
 'marker_fraction_CD20',
 'marker_fraction_CD31',
 'marker_fraction_CD3e',
 'marker_fraction_CD4',
 'marker_fraction_CD45',
 'marker_fraction_CD68',
 'marker_fraction_CD8',
 'marker_fraction_CK14',
 'marker_fraction_CK17',
 'marker_fraction_CK5',
 'marker_fraction_E-cadherin',
 'marker_fraction_FOXP3',
 'marker_fraction_Ki67',
 'marker_fraction_Pan-Cytokeratin',
 'marker_fraction_Podoplanin',
 'marker_fraction_SMA',
 'marker_intensity_BCA1',
 'marker_intensity_Bap1',
 'marker_intensity_CD11b',
 'marker_intensity_CD11c',
 'marker_intensity_CD138',
 'marker_intensity_CD

In [None]:
col = ''
sample_to_normal = {}
for sample in samples:
    