In [28]:
# !pip install opencv-python

In [1]:
from PIL import Image
import numpy as np
import os
import cv2
import keras
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout
import pandas as pd

In [2]:
def readData(filepath, label):
    cells = []
    labels = []
    file = os.listdir(filepath)
    for img in file:
        try:
            image = cv2.imread(filepath + img)
            image_from_array = Image.fromarray(image, 'RGB')
            size_image = image_from_array.resize((50, 50))
            cells.append(np.array(size_image))
            labels.append(label)
        except AttributeError as e:
            print('Skipping file: ', img, e)
    print(len(cells), ' Data Points Read!')
    return np.array(cells), np.array(labels)

In [3]:
ParasitizedCells, ParasitizedLabels = readData('./input/cell_images/Parasitized/', 1)
UninfectedCells, UninfectedLabels  = readData('./input/cell_images/Uninfected/', 0)
Cells = np.concatenate((ParasitizedCells, UninfectedCells))
Labels = np.concatenate((ParasitizedLabels, UninfectedLabels))

Skipping file:  Thumbs.db 'NoneType' object has no attribute '__array_interface__'
13779  Data Points
Skipping file:  Thumbs.db 'NoneType' object has no attribute '__array_interface__'
13779  Data Points


In [4]:
np.save("Cells",Cells)
np.save("Labels",Labels)

In [5]:
Cells=np.load("Cells.npy")
Labels=np.load("Labels.npy")

In [6]:
s = np.arange(Cells.shape[0])
np.random.shuffle(s)
Cells = Cells[s]
Labels = Labels[s]

In [7]:
num_classes=len(np.unique(Labels))
len_data=len(Cells)
print(num_classes, ' Unique Classes')
print(len_data, ' Data Points')

2  Unique Classes
27558  Data Points


In [8]:
(x_train,x_test)=Cells[(int)(0.1*len_data):],Cells[:(int)(0.1*len_data)]
(y_train,y_test)=Labels[(int)(0.1*len_data):],Labels[:(int)(0.1*len_data)]
# As we are working on image data we are normalizing data by divinding 255.
x_train = x_train.astype('float32')/255 
x_test = x_test.astype('float32')/255
train_len=len(x_train)
test_len=len(x_test)
#Doing One hot encoding as classifier has multiple classes
y_train=keras.utils.to_categorical(y_train,num_classes)
y_test=keras.utils.to_categorical(y_test,num_classes)

In [9]:
#creating sequential model
model=Sequential()
model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(500,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(2,activation="softmax"))#2 represent output layer neurons 
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 50, 50, 16)        208       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 25, 25, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 25, 25, 32)        2080      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 64)        8256      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 64)          0         
_________________________________________________________________
dropout (Dropout)            (None, 6, 6, 64)          0

In [10]:
# compile the model with loss as categorical_crossentropy and using adam optimizer you can test result by trying RMSProp as well as Momentum
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
#Fit the model with min batch size as 50[can tune batch size to some factor of 2^power ] 
model.fit(x_train, y_train, batch_size=100, epochs=10, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x14ab8b430>

In [15]:
scores = model.evaluate(x_test, y_test)
print("Loss: ", scores[0])        #Loss
print("Accuracy: ", scores[1])    #Accuracy

Loss:  0.13091224431991577
Accuracy:  0.9586206674575806
