In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import random
import warnings

with warnings.catch_warnings():
    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=UserWarning)

    from keras.models import Sequential
    from keras.layers import Convolution2D
    from keras.layers import MaxPooling2D
    from keras.layers import Flatten
    from keras.layers import Dense
    from keras import models
    from keras import optimizers
    from keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
    from keras import applications

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

import glob
import os
from datetime import datetime

Using TensorFlow backend.


In [39]:
now = datetime.now()

TRAIN_IMG_SRC_FOLDER = '/home/chicobentojr/Desktop/L3Net_exames_segmentados'
TRAIN_IMG_FOLDERS = {
    "exame1": "healthy",
    "exame2": "healthy",
    "exame3": "healthy",
    "exame4": "healthy",
    "exame11": "not healthy",
    "exame12": "not healthy",
    "exame13": "not healthy",
    "exame14": "not healthy",
}

TEST_IMG_SRC_FOLDER = '/home/chicobentojr/Desktop/L3Net_exames_segmentados'
TEST_IMG_FOLDERS = {
    "exame5": "healthy",
    "exame15": "not healthy",
}

EXAM_SLICE = 200
CLASSES = len(set([label for label in TRAIN_IMG_FOLDERS.values()]))
EPOCHS = 10

In [67]:
dfs = []

train_images = {"id": [], "label": []}
test_images = {"id": [], "label": []}

df_config = [
    (TRAIN_IMG_SRC_FOLDER, TRAIN_IMG_FOLDERS, train_images),
    (TEST_IMG_SRC_FOLDER, TEST_IMG_FOLDERS, test_images)
]


def get_filespath(folder, search_filter=''):
    paths = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            path = os.path.join(root, file)
            if search_filter in path:
                paths.append(path)
    return paths


for (base, folder, dic) in df_config:
    for img_folder, img_label in folder.items():
        search_folder = "{}/{}".format(base, img_folder)
        imgs_filename = sorted(get_filespath(search_folder, search_filter='images'))[EXAM_SLICE:EXAM_SLICE*2]
        dic["id"].extend(imgs_filename)
        dic["label"].extend([img_label] * len(imgs_filename))

    dfs.append(pd.DataFrame(data=dic))

train_df, test_df = dfs[0], dfs[1]

train_df.to_csv('train_df.csv', index=False)
test_df.to_csv('test_df.csv', index=False)

print("Train fold with {} images".format(len(train_df)))
print(train_df.groupby("label").label.count())
print()
print("Test fold with {} images".format(len(test_df)))
print(test_df.groupby("label").label.count())
print("-" * 30)

Train fold with 1600 images
label
healthy        800
not healthy    800
Name: label, dtype: int64

Test fold with 400 images
label
healthy        200
not healthy    200
Name: label, dtype: int64
------------------------------


In [68]:
def get_data_generator(dataframe, x_col, y_col, subset=None, shuffle=True):
    datagen = ImageDataGenerator(
        rotation_range=15,
        rescale=1./255,
        shear_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        width_shift_range=0.1,
        height_shift_range=0.1)

    
    data_generator = datagen.flow_from_dataframe(
        dataframe=dataframe,
        x_col=x_col,
        y_col=y_col,
        subset=subset,
        target_size=(64, 64),
        class_mode="binary",
        # color_mode="rgb",
        batch_size=32,
        shuffle=shuffle,
    )
    return data_generator

In [69]:
def get_model():
    #initializing the CNN
    classifier= Sequential()
    #Step 1- adding the Convolutional Layer
    classifier.add(Convolution2D(32, (3, 3), input_shape= (64,64,3), activation= 'relu'))
    #Step 2- adding MaxPooling Layer
    classifier.add(MaxPooling2D(pool_size= (2,2)))
    #Step 3- Flattening
    classifier.add(Flatten())
    #Step 4- Classic ANN with fully-connected layers
    classifier.add(Dense(activation="relu", units=128))
    classifier.add(Dense(activation="sigmoid", units=1))

    return classifier

def train_model(model, train_df, test_df, epochs, callbacks=[]):
    train_generator = get_data_generator(train_df, "id", "label")
    validation_generator = get_data_generator(test_df, "id", "label")

    model.compile(
        optimizer="Adam", loss="binary_crossentropy", metrics=["accuracy"]
    )

    step_size_train = train_generator.n // train_generator.batch_size
    step_size_validation = validation_generator.n // validation_generator.batch_size

    if step_size_train == 0:
        step_size_train = train_generator.n // 2
        step_size_validation = validation_generator.n // 2

    history = model.fit_generator(
        generator=train_generator,
        steps_per_epoch=step_size_train,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=step_size_validation,
        callbacks=callbacks
    )

    return history.history

In [71]:
model = get_model()

In [73]:
history = train_model(model, train_df, test_df, EPOCHS)

history

Found 1600 validated image filenames belonging to 2 classes.
Found 400 validated image filenames belonging to 2 classes.
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


{'val_loss': [0.6507291893164316,
  0.6677262083343838,
  0.9075606672660165,
  0.7949829853099325,
  0.7774807059246561,
  0.999906793884609,
  1.1465244707853899,
  1.1783193608988887,
  1.4077583966047869,
  1.1612184462339983],
 'val_acc': [0.7005208333333334,
  0.6684782608695652,
  0.5190217391304348,
  0.5271739130434783,
  0.6494565217391305,
  0.43478260869565216,
  0.44021739130434784,
  0.4076086956521739,
  0.3858695652173913,
  0.41032608695652173],
 'loss': [0.5407426702976227,
  0.43337304711341856,
  0.3684548878669739,
  0.30852124333381653,
  0.28967876940965653,
  0.25617605477571487,
  0.2144882644712925,
  0.19582326889038085,
  0.1661972899734974,
  0.18899537086486817],
 'acc': [0.74625,
  0.805625,
  0.834375,
  0.873125,
  0.879375,
  0.901875,
  0.921875,
  0.92,
  0.938125,
  0.92875]}