In [163]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import os
import random
from matplotlib import image
from sklearn.model_selection import train_test_split
import pickle
from datetime import datetime


In [164]:
import pandas as pd
from PIL import Image
import seaborn as sns
from ipywidgets import widgets
from pathlib import Path
from IPython.display import display
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout

In [159]:
def get_classes(dir_path):
    return os.listdir(dir_path)

def data_analysis_histogram(dir_path, classes, verbose = 1):
    class_dist = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        class_dist.append(len(os.listdir(class_path)))
    
    if verbose > 0:
        plt.figure(figsize=(16, 8))
        plt.title("Class distribution")
        plt.barh(classes, class_dist)
        for index, value in enumerate(class_dist):
            plt.text(value, index,str(value))
        plt.show()

def data_analysis_image_size(dir_path, classes, verbose = 1, seed = 42):
    random.seed(seed)
    random_class_path = os.path.join(dir_path,random.choice(classes))
    random_img_name = random.choice(os.listdir(random_class_path))
    random_img_path = os.path.join(random_class_path,random_img_name)
    img = image.imread(random_img_path)
    if verbose > 0:
        plt.figure(figsize=(16, 8))
        plt.title("%s - Height: %d px x Length: %d px" % (random_img_path,img.shape[0],img.shape[1]))
        plt.imshow(img)
    
    return (img.shape[0],img.shape[1],1)

def analyse_dataset(dir_path, verbose = 1, seed = 42):
    classes = get_classes(dir_path)
    data_analysis_histogram(dir_path,classes, verbose)
    input_shape = data_analysis_image_size(dir_path,classes, verbose, seed)
    return classes, input_shape


In [14]:
def load_dataset(dir_path, percentage = 1, verbose = 1):
    classes = get_classes(dir_path)
    img_array = []
    class_array = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        imgs_name = os.listdir(class_path)

        if percentage < 1:
            imgs_name = random.sample(imgs_name, k = int(len(imgs_name)*percentage))

        for i in imgs_name:
            img_array.append(image.imread(os.path.join(class_path,i)))
            class_array.append(c)
    if verbose > 0:
        print("Loaded %d images" % len(img_array))
    return np.array(img_array), np.array(class_array)

In [15]:
def split_dataset(x, y, val_size = 0.2, verbose = 1, seed = 42):
    x_train, x_val, y_train, y_val = train_test_split(x,  y, test_size=val_size, random_state=seed)
    if verbose > 0:
        print("Train size: %d\nValidation size: %d" % (len(x_train), len(x_val)))
    return x_train, x_val, y_train, y_val


In [16]:
def prepare_dataset_channel_position(x, input_shape):
    img_lin,img_col,n_channels = input_shape
    if keras.backend.image_data_format() == 'channels_first':
        x = x.reshape(x.shape[0], n_channels, img_lin, img_col)
        input_shape = (n_channels, img_lin, img_col)
    else:
        x = x.reshape(x.shape[0], img_lin, img_col, n_channels)
        input_shape = (img_lin, img_col, n_channels)
    return x, input_shape

def prepare_dataset_input(x, input_shape):
    x_scaled = x.astype('float32') / 255.0
    return prepare_dataset_channel_position(x_scaled, input_shape)

def prepare_dataset_output(y, classes):
    class_map = {x: i for i,x in enumerate(classes)}
    y_code = [class_map[word] for word in y]
    y_categorical = keras.utils.to_categorical(y_code, len(classes))
    inv_class_map = {v: k for k, v in class_map.items()}
    return y_categorical, inv_class_map

def prepare_dataset(x , y , classes, input_shape):
    x_scaled, input_shape = prepare_dataset_input(x, input_shape)
    y_categorical, inv_class_map = prepare_dataset_output(y, classes)
    return x_scaled , y_categorical, inv_class_map, input_shape

In [176]:
def evaluate_model_loss():


def evaluate_model(model, x_train, x_val, y_train, y_val,  history, verbose = 1):
    scoreTrain = model.evaluate(x_train, y_train, verbose = verbose)
    scoreValidation = model.evaluate(x_val, y_val, verbose = verbose)

    if verbose > 0:
        print("Training loss: %.4f" % (scoreTrain))
        print("Validation loss: %.4f" % (scoreValidation))
        plt.figure(figsize=(16, 8))
        plt.plot(history.history['loss'], label="Loss")
        plt.show()

    return scoreTrain, scoreValidation


IndentationError: expected an indented block (<ipython-input-176-3a0981f86cc1>, line 13)

In [153]:
def save_result(model, history, dir_path = 'results'):
    results_directory = os.path.join(dir_path)

    if not os.path.exists(results_directory):
        os.makedirs(results_directory)
    
    now = datetime.now()
    now_str = now.strftime("%Y-%m-%d-%H-%M-%S-%f")

    result_directory = os.path.join(results_directory,now_str)

    if not os.path.exists(result_directory):
        os.makedirs(result_directory)
    else:
        raise ValueError("File already exists.")
    
    model_path = os.path.join(result_directory,'model')
    model.save(model_path)

    evaluation_path = os.path.join(result_directory,'evaluation')

    evaluation = {
            'epochs': history.params['epochs'],
            'history': history.history
    }

    with open(evaluation_path, 'wb') as f:
        pickle.dump(evaluation, f)
    
    print("Saved!")

In [158]:
def load_result(foldername, dir_path = 'results'):
    result_directory = os.path.join(dir_path,foldername)
    if not os.path.exists(result_directory):
        raise ValueError("Folder not found.")
    
    model_path = os.path.join(result_directory,'model')
    model = keras.models.load_model(model_path)

    evaluation_path = os.path.join(result_directory,'evaluation')
    evaluation = pickle.load(open(evaluation_path, "rb"))

    print("Loaded!")
    return model, evaluation


In [174]:
def load_prepare_dataset(dir_path, val_size = 0.2, dataset_percentage = 1, verbose = 1, seed = 42):
    classes, input_shape = analyse_dataset(dir_path, verbose, seed)
    x, y = load_dataset(dir_path, dataset_percentage ,verbose)
    x_train, x_val, y_train, y_val = split_dataset(x, y, val_size, verbose, seed)
    x_train_prepared , y_train_prepared, inv_class_map, input_shape = prepare_dataset(x_train , y_train , classes, input_shape)
    x_val_prepared , y_val_prepared, _, _ = prepare_dataset(x_val , y_val , classes, input_shape)
    return x_train_prepared, x_val_prepared, y_train_prepared, y_val_prepared, inv_class_map, input_shape, classes

###############################################################################################################################################################################################

In [175]:
def run_cnn_A(input_shape, classes, verbose = 1):
    model = keras.Sequential()
    model.add(keras.layers.Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu', input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    model.add(keras.layers.Conv2D(64, kernel_size=(3,3), strides=(1,1), padding='same', activation='relu'))
    model.add(keras.layers.MaxPooling2D(pool_size=(2,2), strides=(2,2)))
    model.add(keras.layers.Conv2D(64, kernel_size=(3,3), strides=(1,1),  padding='same', activation='relu'))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(len(classes), activation='softmax'))

    if verbose > 0:
        model.summary()

    model.compile(loss='categorical_crossentropy',
              optimizer='adam')

    history = model.fit(x_train_prepared, y_train_prepared,
                    batch_size=128,
                    epochs=100, verbose=verbose)

    return model, history

###############################################################################################################################################################################################

In [172]:

# Parameters
dir_path = '../Alzheimer_s Dataset/train'
verbose_loading = 0
verbose_training = 1
dataset_percentage = 1
validation_percentage = 0.2
seed = 42

In [173]:

# Loading and preparing training dataset
classes, input_shape = analyse_dataset(dir_path, verbose_loading, seed)
x, y = load_dataset(dir_path, dataset_percentage , verbose_loading)
x_train, x_val, y_train, y_val = split_dataset(x, y, validation_percentage, verbose_loading, seed)
x_train_prepared , y_train_prepared, inv_class_map, input_shape = prepare_dataset(x_train , y_train , classes, input_shape)
x_val_prepared , y_val_prepared, _, _ = prepare_dataset(x_val , y_val , classes, input_shape)

In [None]:
# Executing
model, history = run_cnn_A(input_shape, classes, verbose_training)
save_result(model, history)

In [None]:
# Evaluating CNN
evaluate_model(model, x_train_prepared, x_val_prepared, y_train_prepared, y_val_prepared,  history, verbose_training)