# Select Feature Images

## Import Modules

In [0]:
import numpy as np
from google.colab import drive
import matplotlib.pyplot as plt
import os
from scipy.io import loadmat, savemat
import skimage.filters as filters
from skimage.transform import rescale

## Get a List of File Names of the Dataset


Assume that the dataset files we are looking for are of type 'mat'. Also assume that the image dataset is stored in the google drive in advance. You can put the dataset anywhere in the drive, but in our case, we put it in '/CS168-Automatic-TICI/data', where CS168-Automatic-TICI is the project repository.

In [0]:
BASE_DIR = os.getcwd()

# Mount the google drive to the virtue machine running this script.
# This will prompt for authorization.
DRIVE_MOUNT_DIR = os.path.join(os.getcwd(), 'drive')
drive.mount(DRIVE_MOUNT_DIR, force_remount=True)

Mounted at /content/drive


In [0]:
# Now the google drive can be accessed at 'DRIVE_MOUNT_DIR/My Drive'.
# Specify the directory to the data set.
REPOSITORY_NAME = 'CS168-Automatic-TICI'
REPOSITORY = os.path.join(DRIVE_MOUNT_DIR, 'My Drive', REPOSITORY_NAME)
DATA_DIR_NAME = 'data'
DATA_DIR = os.path.join(REPOSITORY, DATA_DIR_NAME)

# Get a list of full paths of all mat files in the data directory.
for root, _dirs, files in os.walk(DATA_DIR):
    files = list(filter(lambda fname: fname.lower().endswith('.mat'), sorted(files)))
nfiles = len(files)
print('{} files found in the data directory \'{}\'.'.format(nfiles, DATA_DIR))

201 files found in the data directory '/content/drive/My Drive/CS168-Automatic-TICI/data'.


In [0]:
SAVE_DIR_NAME = 'feature_images'
SAVE_DIR = os.path.join(REPOSITORY, SAVE_DIR_NAME)

!if test -d '$SAVE_DIR'; then rm -r '$SAVE_DIR'; fi
!mkdir '$SAVE_DIR'

## Algorithm to Select Feature Image

In [0]:
def gray2binary(image_set):
    thresholds = list(map(filters.threshold_li, image_set))

    # When an image's frequency is too low, filters.threshold_li may 
    # cause a divide-by-zero RuntimeWarning, and the corresponding
    # threshold will be 0. So we need to remove zeros in thresholds.
    new_image_set = []
    for i in range(len(image_set)):
        if thresholds[i] > 0:
            new_image_set.append(image_set[i])
    
    thresholds = list(filter(lambda th: th > 0, thresholds))
    
    result = []
    for i in range(len(thresholds)):
        result.append(new_image_set[i] > thresholds[i])
    return new_image_set, result

In [0]:
def summarize_middle_artery(image_set):
    # Assume all the images in an image set have the same dimensions.
    image_count, image_height, image_width = np.shape(image_set)
    result = []
    for image in image_set:
        # Only consider the middle 1/4 of the upper 1/4 image.
        middle_section = image[0:int(image_height/4), int(image_width*3/8):int(image_width*5/8)]
        
        summary = list(map(all, middle_section))

        result.append(summary)
    return np.array(result)

In [0]:
def choose_image(image_set):   
    new_image_set, binary_image_set = gray2binary(image_set)
    middle_artery_summary_image_set = summarize_middle_artery(binary_image_set)
    image_count, image_height = np.shape(middle_artery_summary_image_set)
    
    flagOn = False    
    for i in range(image_count):
        nonzeroCount = np.count_nonzero(middle_artery_summary_image_set[i])
        if flagOn and nonzeroCount > 0 and nonzeroCount < image_height / 2:
            return True, new_image_set[i]
        elif nonzeroCount > image_height / 2:
            flagOn = True
    return False, None

## Read the Files, Extract Feature Images and TICIs

Each mat file contains an image set that represents the revascularization of a patient, and a TICI score. Read the files and output the image sets and TICI scores. Explore one of the image sets to get the image set dimensions.

In [0]:
# Given the data_dir and a file name, output the corresponding image set
# and the TICI score.
# Returns:
#     success -- boolean
#     message -- string, the reason if success==False
def extract_feature_image(data_dir, fname, save_dir, verbose=False):
    content = loadmat(os.path.join(data_dir, fname))
    
    # If verbose is True, print the dictionary keys of the mat file content.
    if verbose:
        print('keys = {}'.format(sorted(content.keys())))

    raw_image_set, TICI = content['X'], content['TICI_report']
    
    # The TICI scores in the mat files are in the form of nested np.ndarray's
    # of either strings, numbers, of nan. e.g., ['2a'], [[3]], [[nan]].
    # With assumption of this structure, unify TICI format into a string.
    while isinstance(TICI, np.ndarray):
        TICI = TICI[0] if len(TICI) > 0 else ''
    TICI = str(TICI)
    
    # Ignore the image sets whose TICI score is missing.
    if TICI == 'nan':
        return False, 'The TICI score is "nan".'

    # Originally, raw_image_set[:, :, k] is the kth image.
    # Reorder the dimensions such that raw_image_set[k, :, :] is the kth image.
    image_set = np.transpose(raw_image_set, (2, 0, 1))

    # Only one image from each image set is selected to be fed into the model.
    imgNotNone, image = choose_image(image_set)
    
    if imgNotNone:
        savemat(os.path.join(save_dir, fname), {
            'image': image,
            'TICI': TICI
        })
        return True, ''
    else:
        return False, 'Feature image not found.'

    return False, ''

In [0]:
ignored_files_count = 0

# Extract the image set and TICI information for all mat files.
for n in range(nfiles):
    # Print the progress.
    if n % 10 == 0 or n == nfiles - 1:
        print('{} / {} done'.format(n, nfiles))
    fname = files[n]
    success, message = extract_feature_image(DATA_DIR, fname, SAVE_DIR)
    if not success:
        ignored_files_count += 1
        print('Data file {} ignored. Reason: {}'.format(fname, message))
        
print('\n\nFinished loading all {} files. {} of them are ignored.'.format(
    nfiles, ignored_files_count))


0 / 201 done


  (np.log(mean_back) - np.log(mean_fore)))


Data file fractals_101.mat ignored. Reason: Feature image not found.
Data file fractals_102.mat ignored. Reason: Feature image not found.
Data file fractals_103.mat ignored. Reason: The TICI score is "nan".
10 / 201 done
Data file fractals_108.mat ignored. Reason: Feature image not found.
Data file fractals_115.mat ignored. Reason: The TICI score is "nan".
20 / 201 done
Data file fractals_117.mat ignored. Reason: The TICI score is "nan".
Data file fractals_124.mat ignored. Reason: The TICI score is "nan".
30 / 201 done
Data file fractals_127.mat ignored. Reason: Feature image not found.


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Data file fractals_132.mat ignored. Reason: The TICI score is "nan".
Data file fractals_134.mat ignored. Reason: The TICI score is "nan".
40 / 201 done
Data file fractals_137.mat ignored. Reason: Feature image not found.
Data file fractals_139.mat ignored. Reason: Feature image not found.
Data file fractals_14.mat ignored. Reason: Feature image not found.
Data file fractals_140.mat ignored. Reason: Feature image not found.
50 / 201 done
Data file fractals_144.mat ignored. Reason: Feature image not found.
Data file fractals_150.mat ignored. Reason: Feature image not found.
60 / 201 done
Data file fractals_153.mat ignored. Reason: Feature image not found.
Data file fractals_156.mat ignored. Reason: Feature image not found.
Data file fractals_16.mat ignored. Reason: Feature image not found.
Data file fractals_160.mat ignored. Reason: Feature image not found.
70 / 201 done
80 / 201 done
Data file fractals_172.mat ignored. Reason: Feature image not found.
Data file fractals_173.mat ignored.