In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import DenseNet121, VGG16, VGG19, InceptionV3, ResNet50, Xception
from tensorflow.keras.applications.mobilenet import MobileNet
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.utils import multi_gpu_model

In [None]:
plt.rcParams['figure.dpi'] = 300

# Initialize Parameters 

In [None]:
num_classes = 2
class_names = ['normal', 'tumor']

# h5 weight file
weight_file = '../model/diagnosis_tumor_normal.h5'  

# Load model

In [None]:
# create the base pre-trained model
base_model = InceptionV3(weights=None, include_top=False)

In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(num_classes, activation='softmax')(x)

# this is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)
model = multi_gpu_model(model, gpus=2)

In [None]:
model.load_weights(weight_file)

# Test model

## Predict at patient level

In [None]:
def get_id_by_name(full_path):
    name = os.path.basename(full_path)
    if name.startswith('TCGA'):
        return name[:12]
    else:
        ind = name.find('-')
        if ind == -1:
            return name[:name.find('_')]
        return name[:ind]

In [None]:
test_dirs = '../data/diagnosis/kirp'
test_dirs = os.path.abspath(test_dirs)  # convert relative path to absolute path
mag = '5.0'
true_label = 'tumor'
result_file = '../result/result_diagnosis_tumor_patient_level.csv'

In [None]:
tile_summary = {}
csv_data = {}
for class_name in class_names:
    tile_summary[class_name] = 0
    csv_data[class_name] = []

total_patient_count = 0
correct_patient_count = 0

pids = set([get_id_by_name(full_path) for full_path in glob(test_dirs + '/*')
            if full_path.endswith('_files')])

for pid in pids:
    print('processing ' + pid)
    test_paths = glob(test_dirs + '/' + pid + '*/' + mag + '/*jpeg')

    if len(test_paths) == 0:
        print(pid, "is empty")
        continue

    X_test = np.empty((len(test_paths), 299, 299, 3))

    for i, img_path in enumerate(test_paths):
        img = image.load_img(img_path)
        img = img.resize((299, 299))
        X = image.img_to_array(img)
        X_test[i, :, :, :] = X / 255

    pred_prob = model.predict(X_test)
    prob_avg = np.average(pred_prob, 0)

    resultMap = {}
    resultMap['result type'] = ["avg prob", "count prob"]
    for x in class_names:
        resultMap[x] = list()

    for i in range(len(class_names)):
        resultMap[class_names[i]].append("%.4f" % prob_avg[i])

    pred_cls = np.argmax(pred_prob, 1)

    for i in range(len(class_names)):
        resultMap[class_names[i]].append(
            "%d(%.4f)" % (np.sum(pred_cls == i), np.sum(pred_cls == i) / len(pred_cls)))
        csv_data[class_names[i]].append(round(np.sum(pred_cls == i) / len(pred_cls), 4))
    print(pd.DataFrame(resultMap))
    #  summary by silde
    tile_slide = {}
    for i in range(len(class_names)):
        cls_count = np.sum(pred_cls == i)
        tile_summary[class_names[i]] += cls_count
        tile_slide[class_names[i]] = cls_count

    tile_slide = dict([(k, tile_slide[k]) for k in class_names])
    max_label = max(tile_slide, key=lambda x: tile_slide[x])
    correct_patient_count += max_label == true_label
    print('predict %s, true_label=%s, predict_label=%s' %
          ('success' if max_label == true_label else 'fail', true_label, max_label))
    total_patient_count += 1
    print()

print('----------summary----------')
print("slide: total:%d, correct:%d, correct_ratio:%.2f" % (
    total_patient_count, correct_patient_count, correct_patient_count / total_patient_count))

summary_map = {}
for x in class_names:
    summary_map[x] = list()

for k, v in tile_summary.items():
    summary_map[k].append(v)
    summary_map[k].append(v / sum(tile_summary.values()))
print(pd.DataFrame(summary_map))

print('\nwrite result to %s \n' % os.path.abspath(result_file))
df = pd.DataFrame(csv_data)
df.to_csv(result_file, index=False, sep=',')

## Predict at slide level

In [None]:
test_dirs = '../data/diagnosis/kirp'
test_dirs = os.path.abspath(test_dirs)  # convert relative path to absolute path
mag = '5.0'
true_label = 'tumor'
result_file = '../result/result_diagnosis_tumor_slide_level.csv'

In [None]:
tile_summary = {}
csv_data = {}
for class_name in class_names:
    tile_summary[class_name] = 0
    csv_data[class_name] = []

total_slide_count = 0
correct_slide_count = 0

for test_dir in glob(test_dirs + '/*'):
    basename = os.path.basename(test_dir)
    print("processing", basename)

    test_paths = glob(test_dir + '/' + mag + '/*jpeg')
    if len(test_paths) == 0:
        print(basename, "is empty")
        continue
    
    X_test = np.empty((len(test_paths), 299, 299, 3))
    
    for i, img_path in enumerate(test_paths):
        img = image.load_img(img_path)
        img = img.resize((299,299))
        X = image.img_to_array(img)
        X_test[i,:,:,:] = X / 255

    pred_prob = model.predict(X_test)
    prob_avg = np.average(pred_prob, 0)

    resultMap = {}
    resultMap['result type']= ["avg prob", "count prob"]
    for x in class_names:
        resultMap[x] = list()
    
    for i in range(len(class_names)):
        resultMap[class_names[i]].append("%.4f" % prob_avg[i])
        
    pred_cls = np.argmax(pred_prob, 1)
    
    for i in range(len(class_names)):
        resultMap[class_names[i]].append("%d(%.4f)" % (np.sum(pred_cls==i), np.sum(pred_cls==i)/len(pred_cls)))
        csv_data[class_names[i]].append(round(np.sum(pred_cls == i) / len(pred_cls), 4))
    print(pd.DataFrame(resultMap))
    
    #  summary by silde
    tile_slide = {}
    for i in range(len(class_names)):
        cls_count = np.sum(pred_cls==i)
        tile_summary[class_names[i]] += cls_count
        tile_slide[class_names[i]] = cls_count
    
    tile_slide = dict([(k, tile_slide[k]) for k in class_names])
    max_label = max(tile_slide, key=lambda x:tile_slide[x])
    correct_slide_count += max_label==true_label
    print('predict %s, true_label=%s, predict_label=%s' % 
          ('success' if max_label==true_label else 'fail', true_label, max_label))
    total_slide_count += 1
    print()

print('----------summary----------')
print("slide: total:%d, correct:%d, correct_ratio:%.2f" % (
      total_slide_count, correct_slide_count, correct_slide_count/total_slide_count))
    
summary_map = {}
for x in class_names:
    summary_map[x] = list()    
for k, v in tile_summary.items():
    summary_map[k].append(v)
    summary_map[k].append(v/sum(tile_summary.values()))
print(pd.DataFrame(summary_map)) 

print('\nwrite result to %s \n' % os.path.abspath(result_file))
df = pd.DataFrame(csv_data)
df.to_csv(result_file, index=False, sep=',')

## Generate heatmap

Fucntion used for generating heatmap

In [None]:
# ['normal', 'tumor']
#   gray,     orange
colors_list = np.array([[100, 100, 100], [255, 119, 51]])/255
colors_rev_list = 1 - colors_list
def get_colors(index, prob):
    return colors_list[index] + (1-prob)*colors_rev_list[index]

def imsave_heapmap_grid(data, filename, block_size=10):
    height, width, category = data.shape
    assert category <= 5
    image_map = np.ones(shape=(height*block_size, width*block_size, 3))
    for i in range(height):
        for j in range(width):
            probs = data[i][j]
            if np.sum(probs) == 0:
                continue
            c_max = np.argmax(probs)  # class with max probability
            c_max_prob = probs[c_max]  # max probability value
            color = get_colors(c_max, c_max_prob)
            for block_i in range(block_size):
                for block_j in range(block_size):
                    image_map[i*block_size+block_i][j*block_size+block_j]=color
    plt.imsave(filename, image_map)  

Generate heatmap

In [None]:
test_dirs = '../data/diagnosis/kirp'
test_dirs = os.path.abspath(test_dirs)  # convert relative path to absolute path
mag = '5.0'
heatmap_dir = '../result/heatmap'

In [None]:
for test_dir in glob(test_dirs + '/*'):
    basename = os.path.basename(test_dir)
    print("processing", basename)

    test_paths = glob(test_dir + '/' + mag + '/*jpeg')
    if len(test_paths) == 0:
        print(basename, "is empty")
        continue
    
    X_test = np.empty((len(test_paths), 299, 299, 3))
    
    coords = []
    for i, img_path in enumerate(test_paths):
        name = os.path.splitext(os.path.basename(img_path))[0]
        coord = [int(x) for x in name.split("_")[-2:]]
        coords.append(coord)
        img = image.load_img(img_path)
        img = img.resize((299,299))
        X = image.img_to_array(img)
        X_test[i,:,:,:] = X / 255

    pred_prob = model.predict(X_test)

    h, w = np.max(coords, 0)+1
    prob_map = np.zeros(shape=(h,w,pred_prob.shape[1]))

    for i in range(len(coords)):
        x, y = coords[i]
        prob_map[x][y] = pred_prob[i]
    
    if not os.path.exists(heatmap_dir):
        os.mkdir(heatmap_dir)
    imsave_heapmap_grid(prob_map, os.path.join(heatmap_dir, basename  + '_tumor.png'))
print("\nheatmaps generated in %s\n" % os.path.abspath(heatmap_dir))

## Save normal list (at patch level)

Classify tumor versus normal at patch level, and output a list of the filenames for predicted normal patches

In [None]:
test_dirs = '../data/diagnosis/kirp'
test_dirs = os.path.abspath(test_dirs)  # convert relative path to absolute path
normal_list_file = '../result/normal_list.txt'
mag = '5.0'

In [None]:
with open(normal_list_file, "w") as file:
    for test_dir in glob(test_dirs + '/*'):
        basename = os.path.basename(test_dir)

        test_paths = glob(test_dir + '/' + mag + '/*jpeg')
        if len(test_paths) == 0:
            print(basename, "is empty")
            continue

        X_test = np.empty((len(test_paths), 299, 299, 3))

        for i, img_path in enumerate(test_paths):
            img = image.load_img(img_path)
            img = img.resize((299,299))
            X = image.img_to_array(img)
            X_test[i,:,:,:] = X / 255

        # [normal, tumor]
        pred_prob = model.predict(X_test)

        print(basename)
        for i, img_path in enumerate(test_paths):
            # if the probability of normal larger than 50%, save the image_path to file
            if pred_prob[i][0] > 0.5:
                file.write(img_path + '\n')
    print("\nnormal list saved to %s\n" % os.path.abspath(normal_list_file))