# 7. CNN Scores

For each Emory case, count the blobs in each of the three confidence heatmaps following Tang's method (count all blobs in confidence heatmap, regardless of location).

Tang provides the count for the blobs but not the scores (blob count * 1000 / tissue px area), add the scores for easier use.

In [None]:
import sys
sys.path.append('..')
from modules import heatmap_analysis as ha
import pandas as pd
import numpy as np
from scipy import stats
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib.colors import LinearSegmentedColormap
import girder_client
import os
from os.path import join as oj
import pyvips as Vips
import matplotlib.pyplot as plt
from copy import deepcopy
from PIL import Image, ImageDraw
import cv2
from skimage.transform import resize
from tqdm import tqdm_notebook

In [None]:
# add scores column for Tang data
csv_path = '../CSVs/CNN_vs_CREAD.csv'
df = pd.read_csv(csv_path)

df['tissue_cored_score'] = (df['CNN_cored_count'] * 1000 / df['Area']).tolist()
df['tissue_diffuse_score'] = (df['CNN_diffuse_count'] * 1000 / df['Area']).tolist()
df['tissue_caa_score'] = (df['CNN_caa_count'] * 1000 / df['Area']).tolist()
df.to_csv(csv_path, index=False)
df.head()

In [None]:
# Parameters
confidence_thresholds = [0.1, 0.95, 0.9]
pixel_thresholds = [100, 1, 200]
heatmap_dir = '/mnt/Data/outputs/heatmaps_emory/'
csv_path = '../CSVs/Emory_data.csv'
df = pd.read_csv(csv_path)
gm_mask_dir = '/mnt/Data/outputs/gm_masks/'
save_dir = '/mnt/Data/outputs/blob_masks/'
os.makedirs(save_dir, exist_ok=True)
        
blob_counts = [[], [], []]
gm_blob_counts = [[], [], []]
for i, r in tqdm_notebook(df.iterrows(), total=len(df)):
    case = r['WSI_ID']
    heatmap_path = oj(heatmap_dir, case + '.npy')
    mask_path = oj(gm_mask_dir, case + '.png')
    output = ha.count_blobs(heatmap_path, confidence_thresholds, pixel_thresholds, mask_path=mask_path)
    
    # save blob images
    np.save(oj(save_dir, case), output[0])
    np.save(oj(save_dir, case + '_gm'), output[2])
    
    # append the counts for this case
    for i, (x, y) in enumerate(zip(output[1], output[3])):
        blob_counts[i].append(x)
        gm_blob_counts[i].append(y)
    
    
# add the pathology count columns
df['tissue_cored_count'] = blob_counts[0]
df['tissue_diffuse_count'] = blob_counts[1]
df['tissue_caa_count'] = blob_counts[2]
df['gm_cored_count'] = gm_blob_counts[0]
df['gm_diffuse_count'] = gm_blob_counts[1]
df['gm_caa_count'] = gm_blob_counts[2]

# add the CNN score columns
df['tissue_cored_score'] = (df['tissue_cored_count'] * 1000 / df['tissue_px_count']).tolist()
df['tissue_diffuse_score'] = (df['tissue_diffuse_count'] * 1000 / df['tissue_px_count']).tolist()
df['tissue_caa_score'] = (df['tissue_caa_count'] * 1000 / df['tissue_px_count']).tolist()
df['gm_cored_score'] = (df['gm_cored_count'] * 1000 / df['gm_px_count']).tolist()
df['gm_diffuse_score'] = (df['gm_diffuse_count'] * 1000 / df['gm_px_count']).tolist()
df['gm_caa_score'] = (df['gm_caa_count'] * 1000 / df['gm_px_count']).tolist()

# save
df.to_csv(csv_path, index=False)
df.head()