In [0]:
# Importing required libraries and functions

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras
from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, BatchNormalization, Dropout
from keras.models import Sequential
import os
import cv2
from PIL import Image

Using TensorFlow backend.


In [0]:
# Setting up prerequisites for data pre-processing

base = '../data/'
dataset = []
label = []

In [0]:
# Reading and pre-processing the data and respective labels

for i, image_name in enumerate(os.listdir(base + 'Parasitized/')):
  image = cv2.imread(base + 'Parasitized/' + image_name)
  image = Image.fromarray(image, 'RGB')
  image = image.resize((64, 64))
  dataset.append(np.array(image))
  label.append(0)

for i, image_name in enumerate(os.listdir(base+'Uninfected/')):
  image = cv2.imread(base + 'Uninfected/' + image_name)
  image = Image.fromarray(image, 'RGB')
  image = image.resize((64, 64))
  dataset.append(np.array(image))
  label.append(1)

In [11]:
# Constructing the model

model = Sequential()
model.add(Convolution2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
model.add(Convolution2D(64, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
model.add(Convolution2D(128, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(activation = 'relu', units=256))
model.add(BatchNormalization(axis = -1))
model.add(Dropout(0.2))
model.add(Dense(activation = 'sigmoid', units=2))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 62, 62, 32)        896       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 31, 31, 32)        0         
_________________________________________________________________
batch_normalization_5 (Batch (None, 31, 31, 32)        128       
_________________________________________________________________
dropout_5 (Dropout)          (None, 31, 31, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 29, 29, 64)        18496     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
batch_normalization_6 (Batch (None, 14, 14, 64)        256       
__________

In [0]:
from keras.utils import plot_model
plot_model(model, to_file='/content/gdrive/My Drive/thismodel.png')

In [0]:
# Splitting the dataset into train and dev set

from keras.utils import to_categorical
X_train, X_test, y_train, y_test = train_test_split(dataset, to_categorical(np.array(label)), test_size = 0.20, random_state = 0)

In [0]:
# Training the model

model_history = model.fit(np.array(X_train), y_train, batch_size = 64, verbose = 1, epochs = 10, validation_split = 0.1,shuffle = False)

W0616 23:38:24.941896 140553001047936 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 21600 samples, validate on 2400 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [0]:
#Testing the model on dev set

print(model.evaluate(np.array(X_test), np.array(y_test))[1]*100)

95.71666666666667


In [0]:
# To write submission to csv file

import pandas as pd
df = pd.read_csv('../data/submission.csv')
base = '../data/test/'

Saving submission.csv to submission (2).csv


In [0]:
# Predicting labels in test set

for i in range(len(df['image'])):
  image = cv2.imread(base + df['image'][i])
  image = Image.fromarray(image, 'RGB')
  image = image.resize((64, 64))
  image = np.array(image)
  image = np.resize(image, (1,64,64,3))
  prediction = model.predict(image)
  if(prediction[0][0]>prediction[0][1]):
    df['label'][i]=0
  else:
    df['label'][i]=1
  if i%100==0:
    print(i)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()


0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100
2200
2300
2400
2500
2600
2700
2800
2900
3000
3100
3200
3300
3400
3500
3600
3700
3800
3900
4000
4100
4200
4300
4400
4500
4600
4700
4800
4900
5000
5100
5200
5300
5400
5500
5600
5700
5800
5900
6000
6100
6200
6300
6400
6500
6600
6700
6800
6900
7000
7100
7200
7300
7400
7500


In [0]:
# Saving csv file

df.to_csv('../data/submit.csv')

 cv2.csv   samplesubmission.csv   submission.csv   train
 cv.csv   'submission (1).csv'	  test
 DataSet  'submission (2).csv'	  test.csv
