In [None]:
import keras #for Convolutional neural networks
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
import numpy as np
import cv2 #helps read images 
import os 
import glob #for file related functions
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
uninfected_images = [] #these lists are just for visualizing data 
uninfected_labels = [] #they could technically go inside the method, but I am graphing the data beforehand 
infected_images = []
infected_labels = []
def prepData(): #function that prepares my data 

    for file in glob.glob("cell_images/Uninfected/*.png"): #reads all the data files within the uninfected folder
        file = cv2.imread(file)
        file = cv2.resize(file, (81,81)) #resizes image to be 81x81
        uninfected_images.append(file) #adds it to the uninfected list
        uninfected_labels.append(0) #labels uninfected cell as 0 for training
    print(len(uninfected_images),"files read for uninfected")
    
    
    for file in glob.glob("cell_images/Parasitized/*.png"): #reads all the data files within the infected folder
        file = cv2.imread(file)
        file = cv2.resize(file, (81,81))
        infected_images.append(file)
        infected_labels.append(1) #labels infected cell as 1 for training
    print(len(infected_images), "files read for infected")
    
    images = uninfected_images + infected_images #combine the uninfected images and the infected images into a single dataset
    labels = uninfected_labels + infected_labels #do the same for our labels 
    
    #split the dataset into 66% test 33% train 
    image_train, image_test, label_train, label_test = train_test_split(images, labels, test_size = 0.33) 
    
    #convert all of this to numpy array 
    image_train = np.array(image_train) 
    image_test = np.array(image_test)
    label_train = np.array(label_train)
    label_test = np.array(label_test)
    
    return image_train, image_test, label_train, label_test #return the arrays ready for training and testing

In [None]:
image_train, image_test, label_train, label_test = prepData() #assign variables from our prepdata function

In [None]:
plt.subplot(1, 2, 1), plt.imshow(cv2.cvtColor(uninfected_images[0], cv2.COLOR_BGR2RGB)) #plotting a healthy and infected cell
plt.subplot(1, 2, 2), plt.imshow(cv2.cvtColor(infected_images[0], cv2.COLOR_BGR2RGB)) #just to see what they look like

In [None]:
#imports for building the convolutional neural network
from keras.layers import Convolution2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout
from keras.layers import MaxPooling2D
from keras.models import Sequential
from keras import optimizers
from keras import layers


In [None]:
# Initialising the CNN
model = Sequential()
# Create convolutional layer. A 3x3 window will sweep through the input matrix. 32 output filters for the layer
# input shape is the 81 x 81 image x 3 for red, green, blue
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape=(81 ,81, 3))) 
# Pooling layer, max pooling or grabbing the max value found in a 2x2 window  
model.add(layers.MaxPooling2D((2, 2)))
# Convolutional layer
model.add(layers.Conv2D(32, (3, 3), activation = 'relu', input_shape = (64 ,64,  3)))
# Pooling layer
model.add(layers.MaxPooling2D((2, 2)))
# Adding a second convolutional layer with 64 filters
model.add(layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (64 ,64,  3)))
# Second pooling layer
model.add(layers.MaxPooling2D((2, 2)))
# Adding a third convolutional layer with 128 filters
model.add(layers.Conv2D(128, (3, 3), activation = 'relu', input_shape = (64 ,64,  3)))
# Third pooling layer
model.add(layers.MaxPooling2D((2, 2)))
# Flattening and turning inputs into a single one dimension matrix 
model.add(layers.Flatten())
# Fully connected layer. This is the neural network portion where nodes are trained through backpropagation 
model.add(layers.Dense(units = 512, activation = 'relu'))
model.add(layers.Dense(units = 1, activation = 'sigmoid')) #sigmoid activation for binary classification for output node
model.compile(loss = 'binary_crossentropy', # 2 class label problem, use binary cross entropy
              optimizer = 'adam',
              metrics = ['accuracy'])

In [None]:
model.summary() #this gives a summary of what goes on throughout the model 

In [None]:
#begin training model
history = model.fit(np.array(image_train), #take our image training dataset 
                         label_train, #input our labels or "correct answers" for training dataset 
                         batch_size = 64, #number of training examples in one pass
                         epochs = 10, #run through dataset foward and backwards 10 times 
                         validation_split = 0.1) #split of training dataset that will become validation dataset

In [None]:
results = model.evaluate(image_test,label_test,batch_size = 128) #test our results

In [None]:
#import confusion matrix method
from sklearn.metrics import confusion_matrix

In [None]:
#predict every image in our test data set
y_pred = model.predict(image_test)
print(y_pred)

In [None]:
#since model returns probabilities, we want to turn probabilities into a label, either 0 or 1 
categorized_y_pred = []
for pred in y_pred:
    if pred>0.5: #if the probability is greater than 0.5, then we assume it is classified as label 1
        pred = 1
    else: #if the probability is less than 0.5, we assume it is classified as label 0
        pred = 0
    categorized_y_pred.append(pred)
#print(categorized_y_pred)

In [None]:
conf_matrix = confusion_matrix(label_test, categorized_y_pred) #make our confusion matrix 
#print(conf_matrix)

In [None]:
#formatting our confusion matrix so we can have total numbers as well as normalized data
totalcm = np.char.add("(", conf_matrix.astype("str")) 
totalcm = np.char.add(totalcm, ") ")
ncm = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis] # creating normalized confusion matrix 
labels = (np.asarray(["{0}\n{1:.2f}".format(totalcm,ncm) for totalcm,
                       ncm in zip(totalcm.flatten(),
                       ncm.flatten())])).reshape(2,2)

In [None]:
#we need to manually make a confusion matrix since we can't input our classifier into the plot function
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
ax= plt.subplot()
sns.heatmap(ncm, annot=labels, ax = ax, fmt='', cmap='Reds'); #annotate our cells with the labels

# add our graph titles and other settings
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 
ax.xaxis.set_ticklabels(['Healthy', 'Parasitized']); ax.yaxis.set_ticklabels(['Healthy', 'Parasitized']);


In [None]:
#plotting some accuracy information from our model training 
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
#plotting some test image data and using our model to predict it
#image 0 is an infected cell, image 1 is a healthy cell 
#if the prediction probability for label 1 is low, then we assume it is label 0 
plt.subplot(1, 2, 1), plt.imshow(cv2.cvtColor(image_test[0], cv2.COLOR_BGR2RGB))
plt.subplot(1, 2, 2), plt.imshow(cv2.cvtColor(image_test[1], cv2.COLOR_BGR2RGB))
print("prediction probability of label 1 (infected) for image 1", model.predict(image_test[0].reshape(-1,81,81,3)))
print("prediction probability of label 1 (infected) for image 2",model.predict(image_test[1].reshape(-1,81,81,3)))