# Load Data

In [None]:
import os

architectures = ['efficientnet_b0', 
                 'efficientnet_b2', 
                 'inception_v4', 
                 'pnasnet5large', 
                 'resnext101_32x8d']

arch_dirs = {}
results_dir = r'../../output/train'
dirs = os.listdir(results_dir)
# Group directories in one group

for arch in architectures:
    arch_dirs[arch] = []
    for dir in dirs:
        if arch in dir:
            arch_dirs[arch].append(os.path.join(results_dir, dir))


# Find Max Test Accuracy
Max test acccuracy in each fold of the 5 architectures tested. We take whichever epoch performed the best.

In [None]:
import pandas as pd
arch_results = {}
arch_best = {}
for arch in architectures:
    arch_results[arch] = []
    arch_best[arch] = {}
    best_acc = 0
    for i, dir in enumerate(arch_dirs[arch]):
        data = pd.read_csv(dir + r'/summary.csv')
        curr_acc = max(data["eval_top1"])
        arch_results[arch].append(curr_acc)
        if curr_acc > best_acc: 
            best_acc = curr_acc
            arch_best[arch]['path'] = dir + r'/model_best.pth.tar'
            arch_best[arch]['fold'] = i + 1

Report max test accuracy of each fold and average

In [None]:
import numpy as np
for arch in architectures:
    formatted_results = [ '%.2f' % result for result in arch_results[arch]]
    formatted_average = '%.2f' % np.average(arch_results[arch])
    formatted_fold = 'best fold %i' % arch_best[arch]['fold']
    print(arch, formatted_results, formatted_average, formatted_fold)

# Test and Extract Feature Map

Define calculation for extracting CD80 and CD206 levels from stains. Corrective techniques applied to remove image noise and variance between light levels

In [None]:
from scipy import ndimage

def calculate_intensity(img, channel_num, diag=False):
    arr = np.copy(img[channel_num])
    avg1 = list(arr[:2,:].flatten())
    avg2 = list(arr[:,:2].flatten())
    avg3 = list(arr[-2:,:].flatten())
    avg4 = list(arr[:,-2:].flatten())
    avgs = avg1 + avg2 + avg3 + avg4
    avgs.sort()
    avg = avgs[int(len(avgs)*0.9)] # 70th percentile

    
    arr2 = np.copy(arr)
    arr2 = arr2 - avg
    arr2[arr2 < 0] = 0
    arr2 = ndimage.median_filter(arr2, size=3)
    num_non_zero = np.count_nonzero(arr2)
    num_total = np.sum(arr2)
    avg2 = num_total / num_non_zero
    
    lit_pct = num_non_zero/(96*96)*100

    if diag:
        print("average intensity of lit pixels: ", round(avg2,2))
        print("percentage \"lit\": ", round(lit_pct, 2))
        toshow = [arr, arr2]
        labels = ["Stain", "Clean Stain"]
        num_show = len(toshow)
        f, axarr = plt.subplots(1,num_show, figsize=(8, 4))
        for i in range(num_show):
            axarr[i].imshow(toshow[i])
            axarr[i].grid(False)
            axarr[i].set_title(labels[i]) 
            axarr[i].get_xaxis().set_visible(False)
            axarr[i].get_yaxis().set_visible(False)

        plt.show()
    return avg2

Get CD80 and CD206 images

In [None]:
import pickle

fluor_marks_temp = {}
fluor_paths = {
    ('M0', r'../../data/processed/kerryn_dec/M0.pickle'),
    ('M1', r'../../data/processed/kerryn_dec/M1.pickle'),
    ('M2', r'../../data/processed/kerryn_dec/M2.pickle')
}
for ele in fluor_paths:
    data = pickle.load(open(ele[1], 'rb'))
    fluor_marks_temp[ele[0]] = data['images'][:,2:4,:,:]


Get sample order in each fold. This could be made cleaner by re-organizing everything from the start

In [None]:
from os import walk
fluor_marks = {}
for test_fold in range(1,6):
    test_path = '../../data/processed/dataset_split/fold_%i/test' % test_fold
    fluor_marks[test_fold] = {}
    for pheno in ['M0', 'M1', 'M2']: # Clean this up later
        fluor_marks[test_fold][pheno] = {}
        curr_path = f'{test_path}/{pheno}'
        (_, _, curr_files) = next(walk(curr_path))
        curr_files = [int(file.rstrip('.png')) for file in curr_files]
        for file_num in curr_files:
            fluor_marks[test_fold][pheno][file_num] = fluor_marks_temp[pheno][file_num]

Function to calculate CD80/CD206 levels

In [None]:
fluorescent_stain = {}
thresh = 8
# 0 for CD80(blue), 1 for CD206(red), 2 for both (purple), 3 for neither (grey)

# Need to get these results per fold then apply it to the fold tested
for test_fold in range(1,6):
    fluorescent_stain[test_fold] = {}

    for pheno in ['M0', 'M1', 'M2']: # Clean this up later
        fluorescent_stain[test_fold][pheno] = []

        for sample_num in fluor_marks[test_fold][pheno]:
            CD80_brightness = calculate_intensity(fluor_marks[test_fold][pheno][sample_num], 0)
            CD206_brightness = calculate_intensity(fluor_marks[test_fold][pheno][sample_num], 1)
            

            cond_1 = CD80_brightness > thresh
            cond_2 = CD206_brightness > thresh
            if cond_1 and cond_2:
                fluorescent_stain[test_fold][pheno].append(2)
            elif cond_1:
                fluorescent_stain[test_fold][pheno].append(0)
            elif cond_2:
                fluorescent_stain[test_fold][pheno].append(1)
            else:
                fluorescent_stain[test_fold][pheno].append(3)

In [None]:
import umap
from sklearn.preprocessing import StandardScaler
from operator import mul
from functools import reduce
import copy

umap_results = {}
for model_arch in architectures:    
    print('Processing ' + model_arch)
    model_path = arch_best[model_arch]['path']
    test_fold = arch_best[model_arch]['fold']
    test_path = '../../data/processed/dataset_split/fold_%i/test' % test_fold
    num_classes = 3
    model = create_model(
        model_arch,
        num_classes=num_classes,
        in_chans=3,
        pretrained=False,
        checkpoint_path=model_path)
    model = model.cuda()

    loader = create_loader(
        ImageDataset(test_path),
        input_size=(3,96,96),
        batch_size=1,
        use_prefetcher=True,
        interpolation='bicubic',
        mean=(0.485, 0.456, 0.406),
        std=(0.229, 0.224, 0.225),
        num_workers=1,
        no_aug=True
        )
    model.eval()
        
    feature_maps = []
    targets = []
    with torch.no_grad():
        for batch_idx, (input, target) in enumerate(loader):
            input = input.cuda()
            feature_maps.append(model.forward_features(input).cpu().numpy())
            targets.append(target.cpu())

    feature_maps = np.stack(feature_maps)
    num_samples = feature_maps.shape[0] * feature_maps.shape[1]
    features = feature_maps.shape[2:]
    feature_maps = np.reshape(feature_maps, (num_samples,) + features)
    feature_maps = np.reshape(feature_maps, (num_samples, reduce(mul, features)))
    
    targets = np.stack(targets)
    targets = np.reshape(targets, (num_samples))

    scaled_data = StandardScaler().fit_transform(feature_maps)
    reducer = umap.UMAP(min_dist=0.3, n_neighbors=10)
    umap_embedding = reducer.fit_transform(scaled_data)
    df_umap = pd.DataFrame(umap_embedding,columns=['umap-one', 'umap-two'])
    phenotypes = {0:"M0", 1:"M1", 2:"M2"}
    df_umap['label'] = [phenotypes[int(ele)] for ele in targets]
    
    tmp = []
    stain_temp = copy.deepcopy(fluorescent_stain)
    for i, target in enumerate(targets):
        tmp.append(stain_temp[test_fold][phenotypes[int(target)]].pop())

    df_umap['fluor_mark'] = tmp
    phenotype = {0: "CD80+",
        1: "CD206+",
        2: "CD80+/CD206+",
        3: "CD80-/CD206-"}
    df_umap['fluor_mark'] = [phenotype[int(ele)] for ele in df_umap['fluor_mark']] 
    umap_results[model_arch] = df_umap


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

num_archs = len(architectures)
# fig, ax = plt.subplots(1, num_archs, figsize=(10*num_archs+5,10))


for i, model_arch in enumerate(architectures):
    fig, ax =plt.subplots(1,2, figsize=(20,10))

    colors = ["#0a70c4", "#db0d0d", "#660ddb", "#b5b5b5"]
    customPalette = sns.set_palette(sns.color_palette(colors))
    sns.scatterplot(
        x="umap-one", y="umap-two",
        hue="fluor_mark",
        palette=customPalette,
        data=umap_results[model_arch],
        hue_order = ['CD80+', 'CD206+', 'CD80+/CD206+', 'CD80-/CD206-'],
        legend="full",
        alpha=0.5,
        ax=ax[0],
    )
    ax[0].set_title("Fluorescent Marker Phenotype")
    colors = ["#b5b5b5", "#0a70c4", "#db0d0d"]
    customPalette = sns.set_palette(sns.color_palette(colors))         
    sns.scatterplot(
        x="umap-one", y="umap-two",
        hue="label",
        data=umap_results[model_arch],
        palette=customPalette,
        hue_order = ['M0', 'M1', 'M2'],
        legend="full",
        alpha=0.5,
        ax=ax[1]
    )
    ax[1].set_title("UMAP for " + model_arch)
    plt.show()