## Bronchitis Detection using CNN

Dataset - https://www.kaggle.com/c/bronchitis

Accuracy - 55.6%

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os

In [2]:
#Load training image data and labels
X_train = np.load(file='Train_image_data.npy')
Y_train = np.load(file='Train_label_data.npy')

In [3]:
#Load cv image data and labels
X_cv = np.load(file='cv_image_data.npy')
Y_cv = np.load(file='cv_label_data.npy')

In [4]:
#Load test image data and labels
X_test = np.load(file='Test_image_data.npy')
Y_test = np.load(file='Test_label_data.npy')

In [5]:
#Method to convert labels to one-hot encoding
def convert_to_one_hot(Y, C):
    Y = np.eye(C)[Y.reshape(-1)].T
    return Y

In [6]:
#Convert labels to one-hot encoding
Y_train = convert_to_one_hot(Y_train, 2).T
Y_cv = convert_to_one_hot(Y_cv, 2).T
Y_test = convert_to_one_hot(Y_test, 2).T

In [7]:
print ("number of training examples = " + str(X_train.shape[0]))
print ("number of test examples = " + str(X_test.shape[0]))
print ("X_train shape: " + str(X_train.shape))
print ("Y_train shape: " + str(Y_train.shape))
print ("X_test shape: " + str(X_test.shape))
print ("Y_test shape: " + str(Y_test.shape))

number of training examples = 384
number of test examples = 128
X_train shape: (384, 128, 128, 3)
Y_train shape: (384, 2)
X_test shape: (128, 128, 128, 3)
Y_test shape: (128, 2)


## Training

In [8]:
from keras import layers
from keras.layers import Input, Add, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from keras.models import Model, load_model
from keras.preprocessing import image
from keras.utils import layer_utils
from keras.utils.data_utils import get_file
from keras.applications.imagenet_utils import preprocess_input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras.initializers import glorot_uniform
import scipy.misc
from matplotlib.pyplot import imshow
%matplotlib inline
import tensorflow as tf

import keras.backend as K
K.set_image_data_format('channels_last')
K.set_learning_phase(1)

Using TensorFlow backend.


In [23]:
def bronchitis_classifier(input_shape=(128,128,3),classes=2):
    
    """
    Implementation of the Bronchitis Classifier. Following is the architecture:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FC

    Parameters:
    input_shape -- shape of the images of the dataset
    classes     -- integer, number of classes

    Returns:
    model       -- a Model() instance in Keras
    """
    
    X_input = Input(input_shape)
    X = ZeroPadding2D((3,3))(X_input)
    
    # CONV2D -> RELU -> MAXPOOL
    X = Conv2D(filters=8, kernel_size=(4, 4), strides = (1, 1), padding='same', name = 'conv1', kernel_initializer = glorot_uniform(seed=0))(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(8,8),strides=8,padding='same',name='MAXPOOL1')(X)
    
    # CONV2D -> RELU -> MAXPOOL
    X = Conv2D(filters=16, kernel_size=(2, 2), strides = (1, 1), padding='same', name = 'conv2', kernel_initializer = glorot_uniform(seed=0))(X)
    X = Activation('relu')(X)
    X = MaxPooling2D(pool_size=(4,4),strides=4,padding='same',name='MAXPOOL2')(X)
    
    X = Flatten()(X)
    X = Dense(classes, activation='relu', name='fc' + str(classes), kernel_initializer = glorot_uniform(seed=0))(X)
    
    
    model = Model(inputs = X_input, outputs = X, name='AlexNet')

    return model

In [24]:
#Build model graph
model = bronchitis_classifier(input_shape = (128, 128, 3), classes = 2)

In [25]:
#Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [29]:
#Train the model - currently training on 10 epochs only
model.fit(X_train, Y_train, epochs = 50, batch_size = 64)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1940659e278>

## Evaluation

In [30]:
#Evaluate the model on cross validation set
preds = model.evaluate(X_cv,Y_cv)



In [31]:
#Print the loss of cross validation set
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))

Loss = 0.685636356472969
Test Accuracy = 0.5546875


In [32]:
#Evaluate the model on test set
preds_test = model.evaluate(X_test,Y_test)



In [33]:
#Print the loss of test set
print ("Loss = " + str(preds_test[0]))
print ("Test Accuracy = " + str(preds_test[1]))

Loss = 0.8255279064178467
Test Accuracy = 0.53125


## Submission

In [51]:
from glob import glob
import cv2 as cv

In [52]:
def load_test_images():
    '''
        Read images in test folder and return the list
    '''
    x = []
    img_name_lst = []
    
    IMG_HEIGHT = 128
    IMG_WIDTH  = 128
    
    curDir = os.path.curdir
    test_images = os.path.join(curDir,'Test_Images')
    test_imgs = glob(os.path.join(test_images, "*.png"))
    
    for img in test_imgs:
        test_img = cv.imread(img)
        x.append(cv.resize(test_img,(IMG_WIDTH,IMG_HEIGHT),interpolation=cv.INTER_CUBIC))
        img_name = os.path.basename(img).split('.')[0]
        img_name_lst.append(img_name)
        
        num_images_loaded = 0
        num_images_loaded += 1
        
        if(num_images_loaded % 1000 == 0):
            print('{} testing set images loaded'.format(num_images_loaded))
            
        
        
    print('-------------------------Loaded testing dataset-------------------------------')
        
    return x, img_name_lst

In [53]:
test_x,img_name_lst = load_test_images()

-------------------------Loaded testing dataset-------------------------------


In [56]:
#Load training image data for submission
X_submission = np.asarray(test_x)

In [57]:
#Predit on submission image data
test_predictions = model.predict(X_submission)

In [58]:
#Take argmax to get the highest probable class as the predictions will have probability of 2 classes
test_preds = test_predictions.argmax(axis=1)

In [59]:
submission_df = pd.DataFrame()

In [60]:
submission_df['Images'] = img_name_lst
submission_df['Labels'] = test_preds

In [61]:
submission_df.head()

Unnamed: 0,Images,Labels
0,CR_0001,0
1,CR_0003,0
2,CR_0004,1
3,CR_0020,1
4,CR_0022,0


In [62]:
submission_df.to_csv('submission.csv',index=False)