In [31]:
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import time
from matplotlib import animation
from IPython.display import HTML

In [32]:
tf.__version__

'2.0.0-alpha0'

In [64]:
def loadLabels(directory, subset, injury):
    """
    Loads label csv files
    
    Assumes directory is directory with data in same format as MRNet release
    
    directory: dataset directory
    
    subset: "train" or "valid"
    
    injury: "abnormal", "acl", or "meniscus"
    
    return: a pandas datatable 
    
    """
    return pd.read_csv(directory + "/" + subset + "-" + injury + ".csv", header=None)

In [65]:
train_abnormal = loadLabels("data", "train", "abnormal")
train_acl = loadLabels("data", "train", "acl")
train_meniscus = loadLabels("data", "train", "meniscus")
val_abnormal = loadLabels("data", "valid", "abnormal")
val_acl = loadLabels("data", "valid", "acl")
val_meniscus = loadLabels("data", "valid", "meniscus")

In [66]:
def generateLabelsArray(directory, subset, injury):
    """
    Creates numpy arrays for labels
    
    Assumes directory is directory with data in same format as MRNet release
    
    directory: dataset directory
    
    subset: "train" or "valid"
    
    injury: "abnormal", "acl", or "meniscus"
    
    return: a numpy array
    
    """
    df = loadLabels(directory, subset, injury)[1].values
    return df

In [68]:
train_abnormal_labels = generateLabelsArray("data", "train", "abnormal")

In [5]:
def loadSeries(directory, subset, scanType, scanNumber):
    """
    Loads images in a series
    
    Assumes directory is directory with data in same format as MRNet release
    
    directory: dataset directory
    
    subset: "train" or "valid"
    
    scanType: "axial", "coronal", or "sagittal"
    
    scanNumber: the id of the dataset sample
    
    return: a numpy array s x 256 x 256
    
    """
    stringN = str(scanNumber)
    if len(stringN) < 4:
        for n in range(4 - len(stringN)):
            stringN = "0" + stringN
    
    return np.load(directory + "/" + subset + "/" + scanType + "/" + stringN + ".npy")

In [79]:
test = loadSeries("data","train", "axial", 500)

In [94]:
# dataset stats
min_frames = 99999999
max_frames = 0
sum_frames = 0
for n in range(len(train_abnormal_labels)):
    num_frames = loadSeries("data","train", "axial", n).shape[0]
    sum_frames += num_frames
    if num_frames > max_frames:
        max_frames = num_frames
    if num_frames < min_frames:
        min_frames = num_frames
        
for n in range(len(train_abnormal_labels)):
    num_frames = loadSeries("data","train", "coronal", n).shape[0]
    sum_frames += num_frames
    if num_frames > max_frames:
        max_frames = num_frames
    if num_frames < min_frames:
        min_frames = num_frames
        
for n in range(len(train_abnormal_labels)):
    num_frames = loadSeries("data","train", "sagittal", n).shape[0]
    sum_frames += num_frames
    if num_frames > max_frames:
        max_frames = num_frames
    if num_frames < min_frames:
        min_frames = num_frames
    
print("Average frames " + str(sum_frames / (len(train_abnormal_labels) * 3)))
print("Maximum frames " + str(max_frames))
print("Minimum frames " + str(min_frames))

Average frames 31.50353982300885
Maximum frames 61
Minimum frames 17


In [107]:
def createSeriesArray(directory, subset, scanType, num_samples, start=0):
    data = np.zeros((num_samples, 32, 256, 256))
    for n in range (num_samples):
        series = loadSeries(directory, subset, scanType, start + n)
        num_images = series.shape[0]
        if num_images < 32:
            series = np.pad(series,((0, 32 - num_images),(0,0),(0,0)) , 'constant', constant_values=0)
        else:
            series = series[0:32]
        data[n] += series
    return data
        

In [112]:
train_axial = createSeriesArray("data","train", "axial", len(train_abnormal_labels))

In [41]:
def seriesToVideo(seriesArray, interval=1000):
    """
    Converts a array containing a series of images and converts to a IPython video
    
    seriesArray: a numpy array containing a series of images s x Height x Width
    
    return: an IPython HTML video
    """
    def showFrame(frame):
        plt.imshow(frame, "gray")
        return plt
    fig = plt.figure()
    anim = animation.FuncAnimation(fig, showFrame, frames=vol, interval=interval)
    video = HTML(anim.to_html5_video())
    plt.close()
    return video

In [110]:
seriesToVideo(loadSeries("data","train", "axial", 0))