In [28]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import random

from keras import layers
from keras.models import Sequential
from keras.layers import Convolution2D
from keras.layers import MaxPooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras import models
from keras import optimizers
from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from keras import applications

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

import glob
import os

In [29]:
train_folder_path = "./train"
train_file_names = os.listdir(train_folder_path)
random.shuffle(train_file_names)

test_folder_path = "./test"
test_file_names = os.listdir(test_folder_path)
random.shuffle(test_file_names)

width = 64
height = 64

In [30]:
train_targets = list()
train_full_paths = list()
for file_name in train_file_names:
        target = file_name[0]
        full_path = os.path.join(train_folder_path, file_name)
        train_full_paths.append(full_path)
        train_targets.append(target)
        
train_set = pd.DataFrame()
train_set['image_path'] = train_full_paths
train_set['target'] = train_targets

test_targets = list()
test_full_paths = list()
for file_name in test_file_names:
        target = file_name[0]
        full_path = os.path.join(test_folder_path, file_name)
        test_full_paths.append(full_path)
        test_targets.append(target)
        
test_set = pd.DataFrame()
test_set['image_path'] = test_full_paths
test_set['target'] = test_targets

In [31]:
train_set.head(10)

Unnamed: 0,image_path,target
0,./train/h_091815.603070_I3200001.png,h
1,./train/o_210836.851914_I9000001.png,o
2,./train/o_132446.967902_I4000001.png,o
3,./train/o_132451.517022_I3900001.png,o
4,./train/o_132449.006877_I1970000.png,o
5,./train/o_132449.765191_I2550000.png,o
6,./train/o_132450.333726_I2980000.png,o
7,./train/h_091808.634288_I9800000.png,h
8,./train/o_205844.573332_I1150000.png,o
9,./train/o_210828.929527_I3420000.png,o


In [32]:
test_set.head(10)

Unnamed: 0,image_path,target
0,./test/o_170710.053686_I1040000.png,o
1,./test/o_170713.202492_I2050000.png,o
2,./test/h_144927.826281_I2770000.png,h
3,./test/h_144933.595914_I4610000.png,h
4,./test/o_170717.292069_I3350000.png,o
5,./test/o_170716.497706_I3090000.png,o
6,./test/h_144927.906305_I2800001.png,h
7,./test/o_170707.135295_I1100000.png,o
8,./test/o_170720.263326_I4300001.png,o
9,./test/o_170716.093685_I2970000.png,o


In [33]:
target_counts=train_set['target'].value_counts()
print("Number of healthy patients in the training set:{}".format(target_counts['h']))
print("Number of non-healthy patients in the training set:{}".format(target_counts['o']))

Number of healthy patients in the training set:800
Number of non-healthy patients in the training set:800


In [34]:
target_counts=test_set['target'].value_counts()
print("Number of healthy patients in the test set:{}".format(target_counts['h']))
print("Number of non-healthy patients in the test set:{}".format(target_counts['o']))

Number of healthy patients in the test set:200
Number of non-healthy patients in the test set:200


In [35]:
#initializing the CNN
classifier= Sequential()
#Step 1- adding the Convolutional Layer
classifier.add(Convolution2D(32, (3, 3), input_shape= (64,64,3), activation= 'relu'))
#Step 2- adding MaxPooling Layer
classifier.add(MaxPooling2D(pool_size= (2,2)))
#Step 3- Flattening
classifier.add(Flatten())
#Step 4- Classic ANN with fully-connected layers
classifier.add(Dense(activation="relu", units=128))
classifier.add(Dense(activation="sigmoid", units=1))

In [36]:
#compiling the whole model
classifier.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics= ['accuracy'])

In [38]:
train_datagen=ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.1,
height_shift_range=0.1)

train_datagenerator=train_datagen.flow_from_dataframe(dataframe=train_set,
                                                     x_col="image_path",
                                                     y_col="target",
                                                     target_size=(width, height),
                                                     class_mode="binary",
                                                     batch_size=150)

Found 1600 validated image filenames belonging to 2 classes.


In [39]:
test_datagen=ImageDataGenerator(rescale=1./255)
test_datagenerator=test_datagen.flow_from_dataframe(dataframe=test_set,
                                                   x_col="image_path",
                                                   y_col="target",
                                                   target_size=(width, height),
                                                   class_mode="binary",
                                                   batch_size=150)

Found 400 validated image filenames belonging to 2 classes.


In [40]:
classifier.fit(train_datagenerator,
                         steps_per_epoch = 1600,
                         epochs = 3,
                         validation_data = test_datagenerator,
                         validation_steps = 400)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f58b8355340>

In [42]:
#building a CNN with more layers
classifier= Sequential()

classifier.add(Convolution2D(32, (3, 3), input_shape=(64, 64, 3), activation= 'relu'))
classifier.add(MaxPooling2D(pool_size= (2,2)))

classifier.add(Convolution2D(64, (3, 3), activation= 'relu'))
classifier.add(MaxPooling2D(pool_size= (2,2)))

classifier.add(Convolution2D(128, (3, 3), activation= 'relu'))
classifier.add(MaxPooling2D(pool_size= (2,2)))

classifier.add(Flatten())

classifier.add(Dense(units= 512, activation= 'relu'))

classifier.add(Dense(units= 1, activation= 'sigmoid'))

classifier.compile(optimizer= 'adam', loss= 'binary_crossentropy' ,metrics= ['accuracy'])

In [43]:
train_datagen=ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=False,
width_shift_range=0.1,
height_shift_range=0.1)

train_datagenerator=train_datagen.flow_from_dataframe(dataframe=train_set,
                                                     x_col="image_path",
                                                     y_col="target",
                                                     target_size=(width, height),
                                                     class_mode="binary",
                                                     batch_size=32)

test_datagen=ImageDataGenerator(rescale=1./255)
test_datagenerator=test_datagen.flow_from_dataframe(dataframe=test_set,
                                                   x_col="image_path",
                                                   y_col="target",
                                                   target_size=(width, height),
                                                   class_mode="binary",
                                                   batch_size=32)

classifier.fit(train_datagenerator,
                         steps_per_epoch = 1600,
                         epochs = 6,
                         validation_data = test_datagenerator,
                         validation_steps = 400)

Found 1600 validated image filenames belonging to 2 classes.
Found 400 validated image filenames belonging to 2 classes.
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f5850457d60>