# Capstone: Development of an algorithm for automatic detection of meniscus tears in radiographic images of the knee.

### Contents:
- [Background](#Background)
- [Data](#Data)



In [163]:
# Import libraries here
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob

import random as random
import imageio
import imgaug as ia
from tensorflow import keras
from tensorflow.keras.preprocessing.image import save_img
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
pd.options.mode.chained_assignment = None  # default='warn'

import torch

**Data from [MRNet](https://stanfordmlgroup.github.io/competitions/mrnet/):** 
* Images of 1370 knee MRI exams performed at Stanford University Medical Center. The dataset contains 1,104 (80.6%) abnormal exams,  with 319 (23.3%) ACL tears and 508 (37.1%) meniscal tears; labels were obtained through manual extraction from clinical reports. 
* train set: consists of 1130 MRI images from coronal, sagittal and transverse planes
* valdation set: consists of 120 MRI images from coronal, sagittal and transverse planes


## Data import

### Training set


**import data and get labels**

In [2]:
# from google.colab import drive
# drive.mount('/content/drive') 

In [3]:
# /content/drive/MyDrive/Colab Notebooks/data

In [4]:
# labels_train_abnormal = pd.read_csv('../data/train-abnormal.csv', names=['nr','abnormal'])
# labels_train_meniscus = pd.read_csv('../data/train-meniscus.csv', names=['nr','meniscus'])
# labels_train_acl = pd.read_csv('../data/train-acl.csv', names=['nr','acl'])

In [5]:
# labels_valid_abnormal = pd.read_csv('../data/valid-abnormal.csv', names=['nr','abnormal'])
# labels_valid_meniscus = pd.read_csv('../data/valid-meniscus.csv', names=['nr','meniscus'])
# labels_valid_acl = pd.read_csv('../data/valid-acl.csv', names=['nr','acl'])

In [164]:
train_path = '../data/train/'
valid_path = '../data/valid/'

In [165]:
datasets = {'train': train_path, 'valid': valid_path}
planes = ['axial', 'coronal', 'sagittal']
diagnosis = ['abnormal','meniscus','acl']

In [166]:
def get_data(dataset_type, plane, diagnosis):
    # for eg, (train, sagittal, acl)
    if dataset_type == 'train':
        go = train_path
    else:
        go = valid_path
        
#     img_path = glob(f'{go}/{plane}/*.npy')
    plane_dir = f'{go}/{plane}'
    data = os.listdir(plane_dir)
    
    label_path = f'../data/{dataset_type}-{diagnosis}.csv'
    label_df = pd.read_csv(label_path, names=['nr', diagnosis])
    label_data = list(label_df[diagnosis])
    
    return(data, plane_dir, label_data)

In [167]:
# training data
train_coronal_data, train_coronal_dir, train_coronal_label = get_data('train', 'coronal', 'abnormal')

In [168]:
# validation data
valid_coronal_data, valid_coronal_dir, valid_coronal_label = get_data('valid','coronal','abnormal')

In [169]:
# unfortunately also includes '.DS_Store' file
train_coronal_data

['.DS_Store',
 '0000.npy',
 '0001.npy',
 '0002.npy',
 '0003.npy',
 '0004.npy',
 '0005.npy',
 '0006.npy',
 '0007.npy',
 '0008.npy',
 '0009.npy']

In [170]:
len(train_coronal_data)

11

In [171]:
train_coronal_label

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [172]:
# Create a data generator which will perform real-time data feeding to the Keras model.
# Generators do not store all the values in memory, they generate the values on the fly
def data_gen(data, label, path, data_length):
    all_data = list(zip(data,label))
    random.shuffle(all_data)
    i = 0
    while(True):
        if(i == data_length):
            yield(None,None)
            break
        for pair in all_data:
            if(pair[0] != '.DS_Store'):
                img = np.load(os.path.join(path,pair[0]),allow_pickle=True)
                # img = img.astype(np.uint8)  # ensure data type range(0,255) before augmenting rgb
                # add data augment here
                yield (img,np.repeat(pair[1],img.shape[0]))
        i += 1

In [173]:
# data generation for train - coronal_abnormal
coronal_abnormal = data_gen(train_coronal_data, train_coronal_label, train_coronal_dir, 10)

In [174]:
# data generation for validation - coronal_abnormal
coronal_abnormal_val = data_gen(valid_coronal_data, valid_coronal_label, valid_coronal_dir, 10)

In [125]:
# # check generator (remove random)
# for i in coronal_abnormal:
#     print(i)

(array([[[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 1, 2],
        ...,
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 0, 1, 0]],

       [[0, 0, 0, ..., 1, 1, 0],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 2, 2],
        ...,
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 2],
        [0, 0, 0, ..., 1, 1, 1]],

       [[0, 0, 0, ..., 1, 0, 1],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 1, 2],
        ...,
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 2],
        [0, 0, 0, ..., 1, 1, 1]],

       ...,

       [[0, 0, 0, ..., 0, 0, 1],
        [0, 0, 0, ..., 2, 1, 2],
        [0, 0, 0, ..., 5, 4, 4],
        ...,
        [0, 0, 0, ..., 3, 3, 5],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 1, 1],
        [0, 0, 0, ..., 3, 3, 3],
        [0, 0, 0, ..., 4, 5, 4],
        ...,
        [0, 0, 0, ...,

(array([[[ 1,  1,  1, ...,  0,  0,  0],
        [ 3,  3,  3, ...,  0,  0,  0],
        [ 3,  3,  3, ...,  1,  0,  0],
        ...,
        [ 4,  3,  3, ...,  0,  0,  0],
        [ 3,  3,  3, ...,  0,  0,  0],
        [ 2,  2,  2, ...,  0,  0,  0]],

       [[ 1,  1,  1, ...,  0,  0,  0],
        [ 2,  2,  3, ...,  0,  0,  0],
        [ 4,  3,  4, ...,  1,  1,  0],
        ...,
        [ 3,  3,  4, ...,  0,  0,  0],
        [ 3,  3,  3, ...,  0,  0,  0],
        [ 2,  1,  1, ...,  0,  0,  0]],

       [[ 1,  1,  1, ...,  0,  0,  0],
        [ 2,  2,  2, ...,  0,  0,  0],
        [ 3,  3,  3, ...,  1,  1,  1],
        ...,
        [ 3,  3,  2, ...,  0,  0,  0],
        [ 2,  2,  2, ...,  0,  0,  0],
        [ 2,  1,  2, ...,  0,  0,  0]],

       ...,

       [[ 0,  0,  0, ...,  0,  0,  0],
        [ 1,  2,  2, ...,  0,  0,  0],
        [ 4,  4,  5, ...,  0,  0,  0],
        ...,
        [ 3,  3,  3, ...,  0,  0,  0],
        [ 0,  0,  0, ...,  0,  0,  0],
        [ 0,  0,  0, ...,  0,  

(array([[[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 3, 3, 3],
        ...,
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 1]],

       [[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 2, 4, 4],
        ...,
        [0, 0, 0, ..., 3, 3, 2],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 1]],

       [[0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 4, 3, 3],
        ...,
        [0, 0, 0, ..., 3, 3, 3],
        [0, 0, 0, ..., 2, 2, 2],
        [0, 0, 0, ..., 1, 1, 1]],

       ...,

       [[0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 3, 2, 2],
        [0, 0, 0, ..., 3, 3, 3],
        ...,
        [0, 0, 0, ..., 3, 3, 3],
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 1, 1, 1],
        [0, 0, 0, ..., 2, 2, 2],
        ...,
        [0, 0, 0, ...,

In [None]:
# y_train = label_data['meniscus']

In [None]:
# # https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
# class DataGenerator(keras.utils.Sequence):
#     'Generates data for Keras'
#     def __init__(self, list_IDs):
#         'Initialization'
#         self.list_IDs = list_IDs
#         self.on_epoch_end()

#     def __len__(self):
#     'Denotes the number of batches per epoch'
#         return int(len(self.list_IDs))

#     def __getitem__(self, index):
#     'Generate one batch of data'
#         # Generate indexes of the batch
#         indexes = self.indexes[index:(index+1)]

#     # Find list of IDs
#     list_IDs_temp = [self.list_IDs[k] for k in indexes]

#     # Generate data
#     X, y = self.__data_generation(list_IDs_temp)

#     return X, y

In [None]:
# def on_epoch_end(self):
#     'Updates indexes after each epoch'
#     self.indexes = np.arange(len(self.list_IDs))

In [None]:
# def __data_generation(self, list_IDs_temp):
#     'Generates data containing batch_size samples'
#     data_loc = img_path
#     # Generate data
#     for ID in list_IDs_temp:
#         x_file_path = os.path.join(data_loc, ID)
#         y_file_path = os.path.join(data_loc, image_label_map.get(ID))

#     # Store sample
#     X = np.load(x_file_path)

#     # Store class
#     y = np.load(y_file_path)

#     return X, y

In [None]:
# # Parameters
# params = {'dim': (32,32,32),
#           'batch_size': 64,
#           'n_classes': 2,
#           'n_channels': 1,
#           'shuffle': True}


## Model building

**CNN**

In [175]:
cnn_model = Sequential()

In [176]:
# 1st layer
# input layer before augmentation is (s,256,256)
# cnn_model.add(Flatten(input_shape=(256,256,1)))
cnn_model.add(Conv2D(filters = 96,             # number of filters
                     kernel_size = (11, 11),   # height/width of filter
                     activation='relu',        # activation function 
                     input_shape=(256,256,1),  # shape of input (image)
                     strides=(4,4),
                     padding='valid')) 

# Max pooling
cnn_model.add(MaxPooling2D(pool_size=(3,3),
                          strides=(2,2),
                          padding='valid'))

In [177]:
# 2nd layer
cnn_model.add(Conv2D(filters = 256,             
                     kernel_size = (5, 5),   
                     activation='relu',       
                     strides=(1,1),
                     padding='valid')) 

# Max pooling
cnn_model.add(MaxPooling2D(pool_size=(3,3),
                          strides=(2,2),
                          padding='valid'))

In [178]:
# # 3rd layer
# cnn_model.add(Conv2D(filters = 384,             
#                      kernel_size = (3, 3),   
#                      activation='relu',       
#                      strides=(1,1),
#                      padding='valid')) 

In [179]:
# # 4th layer
# cnn_model.add(Conv2D(filters = 384,             
#                      kernel_size = (3, 3),   
#                      activation='relu',       
#                      strides=(1,1),
#                      padding='valid')) 

In [180]:
# # 5th layer
# cnn_model.add(Conv2D(filters = 256,             
#                      kernel_size = (3, 3),   
#                      activation='relu',       
#                      strides=(1,1),
#                      padding='valid')) 

In [181]:
# flatten from box to vertical line of nodes
cnn_model.add(Flatten())

cnn_model.add(Dense(64, activation='relu'))

In [182]:
# Add a final layer with 2 neurons.
cnn_model.add(Dense(2, activation='softmax'))

In [183]:
cnn_model.summary()

Model: "sequential_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_43 (Conv2D)           (None, 62, 62, 96)        11712     
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 30, 30, 96)        0         
_________________________________________________________________
conv2d_44 (Conv2D)           (None, 26, 26, 256)       614656    
_________________________________________________________________
max_pooling2d_18 (MaxPooling (None, 12, 12, 256)       0         
_________________________________________________________________
flatten_12 (Flatten)         (None, 36864)             0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                2359360   
_________________________________________________________________
dense_5 (Dense)              (None, 2)               

In [184]:
# Compile model
cnn_model.compile(loss='binary_crossentropy',
                  optimizer='sgd',
                  metrics=['accuracy'])

In [185]:
# Fit model on training data
hst = cnn_model.fit(coronal_abnormal,
                        batch_size=256,
                        validation_data=coronal_abnormal_val,
                        epochs=300,
                        verbose=1)

Epoch 1/300


ValueError: in user code:

    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\training.py:796 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1211 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2585 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2945 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\training.py:789 run_step  **
        outputs = model.train_step(data)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\training.py:747 train_step
        y_pred = self(x, training=True)
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:975 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs,
    C:\Users\USER\anaconda3\envs\dsi24\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:191 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_21 is incompatible with the layer: : expected min_ndim=4, found ndim=3. Full shape received: [None, None, None]


In [None]:
# abnormalAccuracys=[]
# coronal_abnormal = model.fit(coronal_abnormal, epochs = 300,steps_per_epoch= coronalt_length,validation_data= coronal_abnormal_val,validation_steps=coronalv_length )
# abnormalAccuracys.append(Average(coronal_abnormal.history['val_acc']))
# plot_graphs(coronal_abnormal)