# Code for Liu and Bosse et al., Reproducible, high-dimensional imaging in archival human tissue by Multiplexed Ion Beam Imaging by Time-of-Flight (MIBI-TOF)
Code in this notebook normalizes the MIBI-TOF data, performs regression for mean pixel intensity (MPI) and percent pixel positive (PPP), and performs comparison with IHC data.

All data necessary to run this notebook are deposited on Zenodo: https://doi.org/10.5281/zenodo.5945388

In [None]:
import ast
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

# Preprocess MIBI-TOF data

Load in MIBI-TOF data table (Ionpath data).

In [None]:
ionpath_loc = 'ionpath_processed_data.csv'
ionpath_data = pd.read_csv(ionpath_loc)

Calculate the mean raw and filtered intensity. The average values are normalized by the corresponding percent pixel positive value, then scaled up by a factor of 100.

In [None]:
# image size is 1024 x 1024, so num pixels is 1024 * 1024
num_pixels = 1024 ** 2

# calculate the mean intensity values
ionpath_data['MeanRawIntensity'] = ionpath_data['RawCounts'] / num_pixels / ionpath_data['PercentPixelPositive'] * 100
ionpath_data['MeanFilteredIntensity'] = ionpath_data['FilteredCount'] / num_pixels / ionpath_data['PercentPixelPositive'] * 100

Subset on the runs desired. The run names are in the following format: `{Y}{M}_Slide{Slide#}Stain{Stain#}Run_{suffix}`.

In [None]:
# define list of runs to use
good_runs = ['201001_Slide23Stain2Run_shuffled', '201001_Slide31Stain4Run_restart', '201003_Slide27Stain5Run_shuffled_run',
             '201005_Slide29Stain3Run_shuffled_4a', '201005_Slide29Stain3Run_shuffled_4b', '201007_Slide21Stain1Run_shuffled_4a',
             '201007_Slide21Stain1Run_shuffled_4b', '201008_Slide25Stain6Run_shuffled_6a', '201008_Slide25Stain6Run_shuffled_6b']

# subset on runs specified
ionpath_data = ionpath_data[ionpath_data['Run'].isin(good_runs)]

# remove any suffixes, such as _shuffled, _restart, ...
# combine the results of runs with the same `{Y}{M}_Slide{Slide#}Stain{Stain#}Run` prefixes.
ionpath_data['Run'] = ionpath_data.apply(lambda row: '_'.join(row['Run'].split('_')[:2]), axis=1)

Subset on the FOVs to be included in the analysis.

In [None]:
# define list of FOVs to use
good_fovs = ['R1C2', 'R1C3', 'R1C4', 'R1C5', 'R1C7', 'R1C9', 'R1C10',
             'R2C10', 'R2C11', 'R2C12', 'R3C2', 'R3C4', 'R6C7', 'R6C10',
             'R6C11', 'R7C6', 'R7C7', 'R7C10', 'R8C1', 'R8C10', 'R8C11']

# subset on FOVs specified
ionpath_data = ionpath_data[ionpath_data['FOVName'].isin(good_fovs)]

# there was a problem with one of the points, so remove that point from the analysis
ionpath_data = ionpath_data[~((ionpath_data['Point'] == 'Point13') & (ionpath_data['FOVName'] == 'R1C4') & (ionpath_data['Run'] == '201007_Slide21Stain1Run'))]

# define a variable to hold all the unique runs (after subsetting)
all_runs = ionpath_data['Run'].unique()

# define a variable to hold all the unique targets
markers = ionpath_data['Target'].unique()

# Get normalization coefficients for MIBI-TOF data

Load CSV file defining the acquisition order of each run as well as the intensities for each run.

In [None]:
acquisition_intensity_calib_file = 'calibration_data.csv'

# this dictionary will help us keep the runs we want and rename the columns so we can easily index into them
run_stain_dict = {
    'Intensity Run 1': '201001_Slide31Stain4Run',
    'Intensity Run 2': '201001_Slide23Stain2Run',
    'Intensity Run 3': '201003_Slide27Stain5Run',
    'Intensity Run 4': '201005_Slide29Stain3Run',
    'Intensity Run 5': '201007_Slide21Stain1Run',
    'Intensity Run 6': '201008_Slide25Stain6Run'
}

This function retrieves the calibration coefficients for each FOV across each run.

In [None]:
def get_calibration_coefs(run_stain_dict, file_name):
    norm_coefs = {}
    
    # read and rename the columns, set index as FOV for ease of use
    df = pd.read_csv(file_name)
    df = df.rename(run_stain_dict, axis=1)
    df = df.set_index('FOV')
    
    # only extract the columns with Slide in their names (after renaming based on run_stain_dict)
    columns = list(df.columns[df.columns.str.contains('Slide')])
    df = df.loc[:, columns]
    
    # compute the average intensity across all values, ignoring NaNs
    avg_all_intensities = np.nanmean(df.values)
    
    # get calibration coefficients for all fovs and stains
    for fov in df.index.values:
        norm_coefs[fov] = {}
        
        for stain in run_stain_dict.values():
            # extract the specific intensity values for the fov and run
            fov_stain_val = df.loc[fov, stain]
            
            # divide fov intensity by the average
            norm_coefs[fov][stain] = fov_stain_val / avg_all_intensities

    return pd.DataFrame(norm_coefs)

Compute the calibration coefficients across each run.

In [None]:
norm_coefs = get_calibration_coefs(run_stain_dict, acquisition_intensity_calib_file)
norm_coefs.to_csv("norm_coefs.csv")

# Normalize MIBI-TOF Data

Use norm_coefs to calibrate the PercentPixelPositive, MeanRawIntensity, and MeanFilteredIntensity values. Each value is divided by the corresponding calibration coefficient for the particular run/FOV pair.

In [None]:
def calibrate_ionpath(agg_runs, markers, ionpath_data, norm_coefs):
    ionpath_calib_data = ionpath_data.copy()
    columns = ['PercentPixelPositive', 'MeanRawIntensity', 'MeanFilteredIntensity']
    
    # calibrate PercentPixelPositive, MeanRawIntensity, and FilteredMeanIntensity
    # by dividing their respective values by the respective Run and FOV contained in norm_coefs
    for col in columns:
        ionpath_calib_data['Calibrated%s' % col] = ionpath_calib_data.apply(
            lambda row: row[col] / norm_coefs.loc[row['Run'], row['FOVName']], axis=1
        )
    
    return ionpath_calib_data

Compute CalibratedPercentPixelPositive, CalibratedMeanRawIntensity, and CalibratedMeanFilteredIntensity values.

In [None]:
ionpath_calib_data = calibrate_ionpath(all_runs, markers, ionpath_data, norm_coefs)
ionpath_calib_data.to_csv("ionpath_norm_data.csv")

# Plot regression statistics from MIBI-TOF Data

Helper function to generate the regression slope, goodness of fit (R^2), and p-value for a particular set of x and y values.

In [None]:
def gen_reg_results(x_vals, y_vals, intercept=False):
    if intercept:
        est = sm.OLS(y_vals, sm.add_constant(x_vals)).fit()

        # the zeroth index of params and pvalues corresponds to the intercept, so need the first for the slope
        m = est.params[1]
        r2 = est.rsquared
        pval = est.pvalues[1]
        resid = est.resid
    else:
        est = sm.OLS(y_vals, x_vals).fit()

        # without an intercept, the only param and pvalue will be for the slope
        m = est.params[0]
        r2 = est.rsquared
        pval = est.pvalues[0]
        resid = est.resid
    
    return m, r2, pval, resid

For each marker, find the slope and R^2 for each run. Each datapoint is the individual run/FOV data for a channel against the average across all runs for the corresponding FOV and channel.

For example, say we're generating the regression data for Run 1, beta-tubulin. For Run 1, beta-tubulin, FOV 1, let's say the value is 1. Let's also say the beta-tubulin, FOV 1 average across all runs is 2. Then the resulting point on the graph will be (2, 1). Points are generated for every FOV, and a regression line is drawn on for the resulting graph.

In [None]:
def ionpath_avg_reg_driver(agg_runs, markers, ionpath_calib_data, to_regress):
    # create the DataFrames, for each marker, hold the respective m and r2 result for each run
    # for each marker, the regression will be for individual fov data for a run against the average across all runs for the corresponding fov
    m_stats = pd.DataFrame(np.nan, index=markers, columns=agg_runs)
    r2_stats = pd.DataFrame(np.nan, index=markers, columns=agg_runs)
    
    # store results to visualize on the heatmap
    run_marker_dict = {}
    
    for run in agg_runs:
        # get the individual run data
        ionpath_run_data = ionpath_calib_data[ionpath_calib_data['Run'] == run]
        
        for marker in markers:
            if marker not in run_marker_dict:
                run_marker_dict[marker] = {}

            run_marker_dict[marker][run] = {}
            indiv_fov_list = []
            avg_fov_list = []
            
            # get marker data
            ionpath_marker_data = ionpath_calib_data[ionpath_calib_data['Target'] == marker]

            for fov in ionpath_marker_data['FOVName'].unique():
                # make sure we're not trying to get data for a fov that doesn't exist for the current run
                if fov not in ionpath_run_data['FOVName'].unique():
                    continue

                # for each fov, get the average across all runs for a specified marker
                ionpath_fov_data = ionpath_marker_data[ionpath_marker_data['FOVName'] == fov]
                avg_all_runs = ionpath_fov_data[to_regress].mean()

                # now get the specific data for the fov and marker for the run we're on
                ionpath_indiv_data = ionpath_run_data[(ionpath_run_data['Target'] == marker) & (ionpath_run_data['FOVName'] == fov)]
                assert ionpath_indiv_data.shape[0] == 1
                
                ionpath_indiv_data = ionpath_indiv_data[to_regress].values[0]
                
                # append the values to the list: (specific fov/marker value for a run vs avg fov/marker value across all runs)
                indiv_fov_list.append(ionpath_indiv_data)
                avg_fov_list.append(avg_all_runs)
            
            # regression
            m, r2, pval, resid = gen_reg_results(avg_fov_list, indiv_fov_list)
            
            # update the m and r^2 stats for each marker-run pair
            m_stats.loc[marker, run] = m
            r2_stats.loc[marker, run] = r2
            
            run_marker_dict[marker][run]['indiv'] = indiv_fov_list
            run_marker_dict[marker][run]['avg'] = avg_fov_list
            
    return m_stats, r2_stats

Compute regression statistics for CalibratedPercentPixelPositive and CalibratedMeanFilteredIntensity.

In [None]:
m_stats_avg_ppp, r2_stats_avg_ppp = ionpath_avg_reg_driver(all_runs, markers, ionpath_calib_data, 'CalibratedPercentPixelPositive')
m_stats_avg_mi, r2_stats_avg_mi = ionpath_avg_reg_driver(all_runs, markers, ionpath_calib_data, 'CalibratedMeanFilteredIntensity')

Renaming and reordering for visualization:

* `marker_rename`: shorten the names of certain markers
* `run_sort`: define a custom sorting order for the runs provided
* `run_rename`: rename the runs in the format `Slide{n}_StainDay{StainDay#}_RunDay{RunDay#}`

In [None]:
# rename for better visualization
marker_rename = {'HLA class 1 A, B, and C, Na-K-ATPase alpha1': 'HLA1 + ATPase'}

run_sort = ['201007_Slide21Stain1Run', '201001_Slide23Stain2Run', '201008_Slide25Stain6Run',
            '201003_Slide27Stain5Run', '201005_Slide29Stain3Run', '201001_Slide31Stain4Run']

run_rename = {'201007_Slide21Stain1Run': 'Slide1_StainDay1_RunDay4',
              '201001_Slide23Stain2Run': 'Slide3_StainDay2_RunDay1',
              '201008_Slide25Stain6Run': 'Slide5_StainDay6_RunDay5',
              '201003_Slide27Stain5Run': 'Slide7_StainDay5_RunDay2',
              '201005_Slide29Stain3Run': 'Slide9_StainDay3_RunDay3',
              '201001_Slide31Stain4Run': 'Slide11_StainDay4_RunDay1'}

m_stats_avg_ppp_reord = m_stats_avg_ppp.rename(marker_rename, axis=0)[list(run_rename.keys())].rename(run_rename, axis=1)
r2_stats_avg_ppp_reord = r2_stats_avg_ppp.rename(marker_rename, axis=0)[list(run_rename.keys())].rename(run_rename, axis=1)

m_stats_avg_mi_reord = m_stats_avg_mi.rename(marker_rename, axis=0)[list(run_rename.keys())].rename(run_rename, axis=1)
r2_stats_avg_mi_reord = r2_stats_avg_mi.rename(marker_rename, axis=0)[list(run_rename.keys())].rename(run_rename, axis=1)

Helper function to visualize the heatmap of MIBI-TOF data.

In [None]:
def ionpath_heatmap_viz_avg(ionpath_arr, stat, metric):
    # create the binary mask array
    arr_bin = ionpath_arr.values.copy()

    # define the tick values to use
    if metric == 'm':
        tick_locs = [0, 0.25, 0.5, 0.75, 1, 1.25, 1.5, 1.75, 2]
    elif metric == 'r2':
        tick_locs = [0, 0.25, 0.5, 0.75, 1.0]
    
    tick_labels = [str(loc) for loc in tick_locs]

    _ = plt.figure(figsize=(10, 10), edgecolor='black')

    # use Seaborn to visualize a heatmap of the results
    if metric == 'm':
        ax = sns.heatmap(ionpath_arr.values, annot=ionpath_arr.values, fmt='.2f',
                         xticklabels=[slide.split('_')[0] for slide in ionpath_arr.columns.values],
                         yticklabels=ionpath_arr.index.values, cmap='vlag', center=1, vmin=0, vmax=2,
                         square=True, linewidths=0.1, linecolor='black', cbar_kws={'ticks': tick_locs})
    elif metric == 'r2':
        tick_locs = np.linspace(0.5, 1, 5).tolist()
        tick_labels = [str(round(loc, 2)) for loc in tick_locs]
        ax = sns.heatmap(ionpath_arr.values, annot=ionpath_arr.values, fmt='.2f',
                         xticklabels=[slide.split('_')[0] for slide in ionpath_arr.columns.values],
                         yticklabels=ionpath_arr.index.values, cmap='viridis',
                         vmin=0.5, vmax=1, square=True, linewidths=0.1, linecolor='black', cbar_kws={'ticks': tick_locs})
    
    # make the spine visible
    for _, spine in ax.spines.items():
        _ = spine.set_visible(True)
    
    # set the colorbar params based on the above
    colorbar = ax.collections[0].colorbar
    _ = colorbar.set_ticklabels(tick_labels)

    # set title
    if stat == 'CalibratedPercentPixelPositive':
        stat_title = 'Percent Positive Pixels'
    elif stat == 'CalibratedMeanFilteredIntensity' or stat == 'CalibratedMeanFilteredIntensityPPPNorm':
        stat_title = 'Mean Pixel Intensity'
        
    if metric == 'm':
        metric_title = 'Slope of linear fit'
    elif metric == 'r2':
        metric_title = 'R\u00b2'.format(2)

    _ = plt.title("%s: %s" % (metric_title, stat_title), fontweight='bold')
    
    # set the tick parameters
    _ = plt.tick_params(axis='both',
                        which='both',
                        bottom=False,
                        top=False,
                        left=False,
                        right=False)

    # set the axis labels
    _ = plt.xlabel('Serial section order', fontweight='bold', labelpad=20)
    _ = plt.ylabel('Antibody', fontweight='bold', labelpad=20)
    
    # make x-axis ticks vertical
    _ = plt.setp(ax.get_xticklabels(), rotation='vertical')

    # save figure
    _ = ax.figure.savefig(os.path.join('ionpath_%s_%s_scores' % (stat, metric)), dpi=500, bbox_inches='tight')

    # clear figure for next visualization
    _ = plt.clf()

Generate heatmaps for the slope and R^2 values of CalibratedPercentPixelPositive and CalibratedMeanFilteredIntensity.

In [None]:
_ = ionpath_heatmap_viz_avg(m_stats_avg_ppp_reord, 'CalibratedPercentPixelPositive', 'm')
_ = ionpath_heatmap_viz_avg(r2_stats_avg_ppp_reord, 'CalibratedPercentPixelPositive', 'r2')
_ = ionpath_heatmap_viz_avg(m_stats_avg_mi_reord, 'CalibratedMeanFilteredIntensity', 'm')
_ = ionpath_heatmap_viz_avg(r2_stats_avg_mi_reord, 'CalibratedMeanFilteredIntensity', 'r2')

# Compare IHC and MIBI-TOF Data

Load the IHC data table.

In [None]:
ihc_loc = 'ihc_data.csv'
ihc_data = pd.read_csv(ihc_loc)

Create 3 mappings:

* `slide_extract`: map the run name to the Ionpath slide extract name (not necessarily the same as found in the run name)
* `ihc_slides`: map the marker to each the corresponding IHC slide
* `marker_to_slide`: map the marker for each IHC to the corresponding Ionpath run(s). Note that a marker's IHC slide number will determine which Ionpath slide extract numbers to map to. For example, if an IHC slide number is 2, then it should map to Ionpath slide extract 1 and 3 (since 2 is 1 away from both 1 and 3)

In [None]:
slide_extract = {
    '201007_Slide21Stain1Run': 'Slide1',
    '201001_Slide31Stain4Run': 'Slide11',
    '201005_Slide29Stain3Run': 'Slide9',
    '201001_Slide23Stain2Run': 'Slide3',
    '201008_Slide25Stain6Run': 'Slide5',
    '201003_Slide27Stain5Run': 'Slide7'
}

ihc_slides = {
    'CD8': 'Slide2',
    'PanCK': 'Slide4',
    'PAX5': 'Slide8',
    'CD68': 'Slide10',
    'CD3': 'Slide12'
}

marker_to_slide = {
    'CD8': ['201007_Slide21Stain1Run', '201001_Slide23Stain2Run'],
    'CD68': ['201001_Slide31Stain4Run', '201005_Slide29Stain3Run'],
    'CD3': ['201001_Slide31Stain4Run'],
    'PAX5': ['201005_Slide29Stain3Run'],
    'PanCK': ['201001_Slide23Stain2Run', '201008_Slide25Stain6Run']
}

Map each IHC marker to the corresponding Ionpath run name.

In [None]:
# for mapping convenience
ihc_data['Run'] = ihc_data['Target']

for key in marker_to_slide:
    ihc_data['Run'] = np.where(ihc_data['Run'] == key, str(marker_to_slide[key]), ihc_data['Run'])

Subset IHC data on the list of FOVs provided (same as `good_fovs` specified above).

In [None]:
ihc_data = ihc_data[ihc_data['FOVName'].isin(good_fovs)]

Read in mapping of FOV to tissue type (needed for plotting).

In [None]:
tissue_data_loc = 'tissue_data.csv'
tissue_data = pd.read_csv(tissue_data_loc)
tissue_data = tissue_data.set_index('Core')

# Define colors for plotting (points in regression plot are colored by tissue type).
colors = ["#E41A1C","#377EB8","#4DAF4A","#984EA3","#FF7F00","#FFFF33","#A65628","#F781BF","#999999","#66C2A5","#FC8D62","#8DA0CB","#E78AC3","#A6D854","#FFD92F","#E5C494","#B3B3B3"]

Helper function to plot the regression results of the IHC data.

In [None]:
def plot_reg_results_ihc(marker, run_x, run_y, ihc_slide, mibi_slide, all_dat, to_regress_ihc, to_regress_ionpath, save_dir, r2, m=None, intercept=False):
    # if the save directory doesn't already exists, make it!
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    
    fig = plt.figure(figsize=(50, 50))
    
    x_vals = all_dat[to_regress_ihc].values
    y_vals = all_dat[to_regress_ionpath].values
    inds = list(all_dat.index)
    
    # get an acceptable range for the graph
    x_pos = min(x_vals)
    y_pos = max(y_vals)

    # plot the data
    sns.lmplot(x=to_regress_ihc, y=to_regress_ionpath, hue='Tissue', data=all_dat, fit_reg = False, palette=sns.color_palette(colors,len(np.unique(all_dat['Tissue']))))
    ax = sns.regplot(x=to_regress_ihc, y=to_regress_ionpath, data=all_dat, scatter_kws={"zorder":-1})
    
    # define the upper and lower bound for both x- and y-axes
    _ = ax.set_xbound(lower=0, upper=max(x_vals) + 1)
    _ = ax.set_ybound(lower=0, upper=max(y_vals) + 1)
    
    # set labels
    _ = plt.xlabel(run_x, fontsize=18, fontweight='bold', labelpad=20)
    _ = plt.ylabel(run_y, fontsize=18, fontweight='bold', labelpad=20)
    
    # write the actual values on the graph
    _ = plt.text(0.05, 0.9, marker, fontsize=28, fontweight='bold', transform=ax.transAxes)
    _ = plt.text(0.05, 0.85, "R^2: %.2f" % r2, fontsize=18, transform=ax.transAxes)
    _ = plt.text(0.05, 0.7, "MIBI: %s\nIHC: %s" % (mibi_slide, ihc_slide), fontsize=18, transform=ax.transAxes)
    _ = plt.setp(ax.get_xticklabels(), rotation='vertical', fontsize=18)
    _ = plt.setp(ax.get_yticklabels(), fontsize=18)

    # save figure
    _ = plt.savefig(os.path.join(save_dir, 'marker_%s_%s_%s_vs_%s_%s.png' % (marker, run_y, mibi_slide, run_x, ihc_slide)), dpi=500, bbox_inches='tight')

    # now clear the plot
    _ = plt.clf()

Generate the regression results for IHC data. The regression is computed on the IHC metric for a marker compared to the corresponding calibrated MIBI-TOF metric for the marker.

In [None]:
def ihc_gen_results(marker, run, ihc_data, ionpath_calib_data, to_regress_ihc, to_regress_ionpath, slide_extract, ihc_slides, save_dir):
    # subset the data, and set the index to RC/FOVName to make life easy later (the upper is to fix PanCK)
    ihc_sub = ihc_data[ihc_data['Target'] == marker]
    ionpath_sub = ionpath_calib_data[(ionpath_calib_data['Run'] == run) & (ionpath_calib_data['Target'] == marker.upper())]
    ihc_sub = ihc_sub.set_index('FOVName')
    ionpath_sub = ionpath_sub.set_index('FOVName')
    
    # select only the fovs that belong to both ihc and ionpath
    ihc_fovs = set(ihc_sub.index.values.tolist())
    ionpath_fovs = set(ionpath_sub.index.values.tolist())

    fovs_to_select = list(ihc_fovs.intersection(ionpath_fovs))
    
    # now run the additional subset with only the fovs that belong to both
    ihc_sub = ihc_sub.loc[fovs_to_select]
    ionpath_sub = ionpath_sub.loc[fovs_to_select]
    tissue_sub = tissue_data.loc[fovs_to_select]
    
    # make one data table with all data
    concat_dat = pd.concat([ihc_sub[to_regress_ihc],ionpath_sub[to_regress_ionpath],tissue_sub],axis=1)

    # generate the regression results
    # we will be including an intercept term in this regression this time around
    m, r2, pval, resid = gen_reg_results(concat_dat[to_regress_ihc].values, concat_dat[to_regress_ionpath].values, intercept=True)
    
    # add residuals to data
    concat_dat['residuals'] = resid

    # sort data by tissue so that the colors appear in the same order
    sorter = ["Thymus","Lymph node","Colon adenocarcinoma","Spleen","Leiomyosarcoma","Squamous cell carcinoma","Placenta","Tonsil","Foreign body giant cells","Dermal sarcoma","Bladder carcinoma","Salivary cystadenoma","Myxofibrosarcoma","Breast ductal carcinoma"]
    concat_dat.Tissue = concat_dat.Tissue.astype("category")
    concat_dat.Tissue.cat.set_categories(sorter, inplace=True)
    concat_dat.sort_values(["Tissue"], inplace=True)

    # Plot regression
    _ = plot_reg_results_ihc(marker, 'IHC', 'MIBI', ihc_slides[marker], slide_extract[run], concat_dat,
                             to_regress_ihc, to_regress_ionpath,
                             save_dir, r2, m, intercept=True)
        
    return concat_dat[to_regress_ihc].values, concat_dat['residuals'].values, '%s_MIBI_%s_IHC_%s' % (marker, slide_extract[run], ihc_slides[marker])

The driver function for the IHC regressions.

In [None]:
def ihc_reg_driver(ihc_data, ionpath_calib_data, to_regress_ihc, to_regress_ionpath, slide_extract, ihc_slides, save_dir):
    
    all_resid = pd.DataFrame()

    # iterate over each marker and run pairing in ihc_data
    for marker, run_list in ihc_data[['Target', 'Run']].drop_duplicates().values:
        for run in ast.literal_eval(run_list):
            ihc, resid, name = ihc_gen_results(marker, run, ihc_data, ionpath_calib_data, to_regress_ihc, to_regress_ionpath, slide_extract, ihc_slides, save_dir)
            df = pd.DataFrame(data=resid, columns=['residual'])
            df['ihc'] = ihc
            df['marker'] = marker
            df['run'] = run
            df['name'] = name
            all_resid = all_resid.append(df, ignore_index=True)
    
    return all_resid

For each IHC marker, plot and save the regression results for percent pixel positive against the corresponding calibrated MIBI-TOF data.

In [None]:
all_resid = ihc_reg_driver(ihc_data, ionpath_calib_data, 'DABPercentPixelPositive', 'CalibratedPercentPixelPositive', slide_extract, ihc_slides, 'regression_plots_ihc')

Plot all residuals.

In [None]:
# Only keep comparisons we care about
keep_resid = all_resid.loc[all_resid['name'].isin(['CD3_MIBI_Slide11_IHC_Slide12','CD8_MIBI_Slide3_IHC_Slide2','CD68_MIBI_Slide9_IHC_Slide10','PanCK_MIBI_Slide5_IHC_Slide4','PAX5_MIBI_Slide9_IHC_Slide8'])]

# Divide residual by IHC
keep_resid['resid_norm'] = keep_resid['residual']/keep_resid['ihc']
keep_resid.to_csv("residuals.csv")

# Make plot
fig = plt.figure(figsize=(5,5))
splot = sns.scatterplot(x='ihc', y='residual', data=keep_resid, hue='marker', palette=sns.color_palette("Dark2",len(np.unique(all_resid['marker']))))
splot.set(xlim=(0,100),ylim=(-50,50))
splot.legend(title="Marker")
plt.xlabel('IHC', fontsize=18, labelpad=20)
plt.ylabel('Residual', fontsize=18, labelpad=20)
plt.savefig(os.path.join('regression_plots_ihc', 'all_residuals.png'), dpi=500, bbox_inches='tight')