# CONVOLUTIONAL NEURAL NETWORK

## Project- Write an algorithm for a Bone X-Ray detection

In [4]:
import pandas as pd
pd.set_option('display.max_colwidth', 99)

import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image, ImageFilter, ImageEnhance, ImageOps
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, cohen_kappa_score
from numpy.random import seed
from tensorflow import set_random_seed
import random
random.seed(2048)
seed(2048)
set_random_seed(2048)

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization, LeakyReLU
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD, RMSprop, Adam
from keras.callbacks import EarlyStopping
from keras import regularizers
from keras.regularizers import l2
from keras import backend as K
import gc

## Import Dataset
In the code cell below we import a csv file of training data and validation data 

- `train_image_paths.csv`
- `train_labeled_studies.csv`
- `valid_image_paths.csv`
- `valid_labeled_studies.csv`


In [5]:
# Loading dataset's CSVs
train_image_paths = pd.read_csv('MURA-v1.1/train_image_paths.csv', names=['filepath'])
train_labeled_studies = pd.read_csv('MURA-v1.1/train_labeled_studies.csv', dtype={1: str}, names=['path','class'])
valid_image_paths = pd.read_csv('MURA-v1.1/valid_image_paths.csv', names=['filepath'])
valid_labeled_studies = pd.read_csv('MURA-v1.1/valid_labeled_studies.csv', dtype={1: str}, names=['path','class'])


## Data Preprocessing

Manipulating the dataframes and merging validation data to the testing data 


In [6]:

# Manipulating the dataframes
train_image_paths['case'] = train_image_paths['filepath'].apply(lambda x: x.split('/')[2])
train_image_paths['patient'] = train_image_paths['filepath'].apply(lambda x: x.split('/')[3])
train_image_paths['patient_folder'] = train_image_paths['filepath'].apply(lambda x: x.split('/')[4])
train_image_paths['patient_file'] = train_image_paths['filepath'].apply(lambda x: x.split('/')[5])
train_image_paths['merge_path'] = train_image_paths['filepath'].apply(lambda x: x.rpartition('/')[0]+'/')
train_data_df = train_image_paths.merge(train_labeled_studies, how='inner', left_on='merge_path', right_on='path').drop(columns=['merge_path', 'path'])

valid_image_paths['case'] = valid_image_paths['filepath'].apply(lambda x: x.split('/')[2])
valid_image_paths['patient'] = valid_image_paths['filepath'].apply(lambda x: x.split('/')[3])
valid_image_paths['patient_folder'] = valid_image_paths['filepath'].apply(lambda x: x.split('/')[4])
valid_image_paths['patient_file'] = valid_image_paths['filepath'].apply(lambda x: x.split('/')[5])
valid_image_paths['merge_path'] = valid_image_paths['filepath'].apply(lambda x: x.rpartition('/')[0]+'/')
test_data_df = valid_image_paths.merge(valid_labeled_studies, how='inner', left_on='merge_path', right_on='path').drop(columns=['merge_path', 'path'])

train_data_df.head(10)

Unnamed: 0,filepath,case,patient,patient_folder,patient_file,class
0,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image1.png,XR_SHOULDER,patient00001,study1_positive,image1.png,1
1,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image2.png,XR_SHOULDER,patient00001,study1_positive,image2.png,1
2,MURA-v1.1/train/XR_SHOULDER/patient00001/study1_positive/image3.png,XR_SHOULDER,patient00001,study1_positive,image3.png,1
3,MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image1.png,XR_SHOULDER,patient00002,study1_positive,image1.png,1
4,MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image2.png,XR_SHOULDER,patient00002,study1_positive,image2.png,1
5,MURA-v1.1/train/XR_SHOULDER/patient00002/study1_positive/image3.png,XR_SHOULDER,patient00002,study1_positive,image3.png,1
6,MURA-v1.1/train/XR_SHOULDER/patient00003/study1_positive/image1.png,XR_SHOULDER,patient00003,study1_positive,image1.png,1
7,MURA-v1.1/train/XR_SHOULDER/patient00003/study1_positive/image2.png,XR_SHOULDER,patient00003,study1_positive,image2.png,1
8,MURA-v1.1/train/XR_SHOULDER/patient00003/study1_positive/image3.png,XR_SHOULDER,patient00003,study1_positive,image3.png,1
9,MURA-v1.1/train/XR_SHOULDER/patient00004/study1_positive/image1.png,XR_SHOULDER,patient00004,study1_positive,image1.png,1


In [7]:
# XR_SHOULDER, XR_HUMERUS, XR_FINGER, XR_ELBOW, XR_WRIST, XR_FOREARM, XR_HAND
# xr_train_df = train_data_df[train_data_df.case=='XR_WRIST']
# xr_test_df = test_data_df[test_data_df.case=='XR_WRIST']
# Copy the training and testing dataframe into new variables

xr_train_df = train_data_df
xr_test_df = test_data_df

## Shuffling of the training data

In [8]:
xr_train_df = xr_train_df.sample(frac = 1).reset_index(drop = True)

## Defining the functions for processing the images before producing the final dataset

In [9]:
pic_size = 256

# Function for making an image square, enhancing contrast and applying SHARPEN filter
def make_square(im, min_size = 256, fill_color =0):
    x, y = im.size
    size = max(min_size, x, y)
    new_im = Image.new('L', (size, size), fill_color)
    new_im.paste(im, (int((size - x) / 2), int((size - y) / 2)))
    enhancer = ImageEnhance.Contrast(new_im)
    enhanced_im = enhancer.enhance(1.1)
    return enhanced_im.filter(ImageFilter.SHARPEN)

# invert an image
def invert_image(im):
    return ImageOps.invert(im)

def make_dataset(dataframe_var, test = False):
    train_x = []
    train_y = []
    patient_nr_y = []
    
    
    # Make each image square and of size 256x256
    for index, row in dataframe_var.iterrows():
        im = Image.open(row['filepath']).convert('L')
        squared = make_square(im)
        resized = squared.resize((pic_size, pic_size))
        
        # For the training set only, augment the data by applying random rotations and inversions (negative)
        
        if test == False: 
            rotated_r = resized.rotate(random.randint(10,30))
            if index%2: 
                rotated_r = invert_image(rotated_r)
            numpy_rotated_r_pic = np.array(rotated_r)/255
            train_x.append(numpy_rotated_r_pic)
            train_y.append(row['class'])
                
            rotated_1 = resized.rotate(random.randint(330, 350))
            if not index%2: 
                rotated_1 = invert_image(rotated_1)
            numpy_rotated_r_pic = np.array(rotated_1)/255
            train_x.append(numpy_rotated_r_pic)
            train_y.append(row['class'])
            
        numpy_pic = np.array(resized)/255
        train_x.append(numpy_pic)
        train_y.append(row['class'])
        if test: patient_nr_y.append(row['patient'][7:]+'_'+row['patient_folder'])
            
    return train_x, train_y, patient_nr_y

                    
                
        



## Produce the final dataset

In [None]:
xr_train_x, xr_train_y, blank = make_dataset(xr_train_df)
xr_train_x = np.asarray(xr_train_x)
xr_train_y = np.asarray(xr_train_y)

xr_test_x, xr_test_y, xr_patient = make_dataset(xr_test_df, test = True)
xr_test_x = np.asarray(xr_test_x)
xr_test_y = np.asarray(xr_test_y)
xr_patient_y = np.asarray(xr_patient_y)

In [None]:
plt.imshow(xr_train_x[24332])
