In [39]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow import keras
import os
import random
from matplotlib import image
from sklearn.model_selection import train_test_split
import pickle
from datetime import datetime
from sklearn.utils.class_weight import compute_class_weight
import json
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve
from prettytable import PrettyTable 
from contextlib import redirect_stdout
from sklearn.metrics import auc
import time

In [40]:
%run models.ipynb

In [41]:
#import tensorflow as tf
#from PIL import Image
#from sklearn.metrics import balanced_accuracy_score
#from ipywidgets import widgets
#rom pathlib import Path
#from IPython.display import display
#from keras.models import Sequential
#from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout

In [42]:
def get_classes(dir_path):
    return os.listdir(dir_path)

def data_analysis_histogram(dir_path, classes, verbose = 1):
    class_dist = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        class_dist.append(len(os.listdir(class_path)))
    
    if verbose > 0:
        plt.figure(figsize=(16, 8))
        plt.title("Class distribution")
        plt.barh(classes, class_dist)
        for index, value in enumerate(class_dist):
            plt.text(value, index,str(value))
        plt.show()
    return class_dist

def data_analysis_image_size(dir_path, classes, verbose = 1, seed = 42):
    random.seed(seed)
    random_class_path = os.path.join(dir_path,random.choice(classes))
    random_img_name = random.choice(os.listdir(random_class_path))
    random_img_path = os.path.join(random_class_path,random_img_name)
    img = image.imread(random_img_path)
    if verbose > 0:
        plt.figure(figsize=(16, 8))
        plt.title("%s - Height: %d px x Length: %d px" % (random_img_path,img.shape[0],img.shape[1]))
        plt.imshow(img)
    
    return (img.shape[0],img.shape[1],1)

def analyse_dataset(dir_path, verbose = 1, seed = 42):
    classes = get_classes(dir_path)
    class_dist = data_analysis_histogram(dir_path,classes, verbose)
    input_shape = data_analysis_image_size(dir_path,classes, verbose, seed)
    return classes, input_shape, class_dist


In [43]:
def load_dataset(dir_path, percentage = 1, verbose = 1):
    classes = get_classes(dir_path)
    img_array = []
    class_array = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        imgs_name = os.listdir(class_path)

        if percentage < 1:
            imgs_name = random.sample(imgs_name, k = int(len(imgs_name)*percentage))

        for i in imgs_name:
            img_array.append(image.imread(os.path.join(class_path,i)))
            class_array.append(c)
    if verbose > 0:
        print("Loaded %d images" % len(img_array))
        
    return np.array(img_array), np.array(class_array)

In [44]:
def split_dataset(x, y, val_size = 0.2, verbose = 1, seed = 42):
    x_train, x_val, y_train, y_val = train_test_split(x,  y, test_size=val_size, random_state=seed)
    if verbose > 0:
        print("Train size: %d\nValidation size: %d" % (len(x_train), len(x_val)))
    return x_train, x_val, y_train, y_val

In [45]:
def prepare_dataset_channel_position(x, input_shape):
    img_lin,img_col,n_channels = input_shape
    if keras.backend.image_data_format() == 'channels_first':
        x = x.reshape(x.shape[0], n_channels, img_lin, img_col)
        input_shape = (n_channels, img_lin, img_col)
    else:
        x = x.reshape(x.shape[0], img_lin, img_col, n_channels)
        input_shape = (img_lin, img_col, n_channels)
    return x, input_shape

def prepare_dataset_input(x, input_shape):
    x_scaled = x.astype('float32') / 255.0
    return prepare_dataset_channel_position(x_scaled, input_shape)

def prepare_dataset_output(y, classes):
    class_map = {x: i for i,x in enumerate(classes)}
    y_code = [class_map[word] for word in y]
    y_categorical = keras.utils.to_categorical(y_code, len(classes))
    inv_class_map = {v: k for k, v in class_map.items()}
    return y_categorical, inv_class_map

def prepare_dataset(x , y , classes, input_shape):
    x_scaled, input_shape = prepare_dataset_input(x, input_shape)
    y_categorical, inv_class_map = prepare_dataset_output(y, classes)
    return x_scaled , y_categorical, inv_class_map, input_shape

In [46]:
def plot_execution_history(foldername, history, model, verbose, save = True, results_path = 'results'):
    result_directory = os.path.join(results_path,foldername)
    image_path = os.path.join(result_directory,'execution.png')

    nplots = len(history.values())/2
    nrows = int(nplots/3)
    ncols = 3
    fig = plt.figure(figsize=(ncols*8, nrows*5))
    gs = fig.add_gridspec(nrows, ncols)
    axs = gs.subplots(sharex=False, sharey=False)
    for i,h in enumerate(history.values()):
        i_hat = int(i%nplots)
        r = int(i_hat/ncols)
        c = i_hat%ncols
        axs[r,c].plot(h, label = 'Training' if int(i/nplots)==0 else 'Validation')
    
    for i,n in enumerate(model.metrics_names):
        r = int(i/ncols)
        c = i%ncols
        axs[r,c].set_xlabel('Epoch')
        axs[r,c].set_ylabel(n)
        axs[r,c].legend()
    
    if save:
        plt.savefig(image_path)
    if verbose > 0:
        plt.show()

In [47]:
def plot_evalution_roc(foldername, name, model, x, y, inv_class_map, verbose = 1, save = True, results_path = 'results'):
    y_pred = model.predict(x)
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    for i in range(len(inv_class_map)):
        fpr[i], tpr[i], _ = roc_curve(y[:, i], y_pred[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])
    plt.figure(figsize=(16, 16))
    for i in range(len(inv_class_map)):
        plt.plot(
            fpr[i],
            tpr[i],
            label="ROC curve of {0} (AUC = {1:0.2f})".format(inv_class_map[i], roc_auc[i]))
    plt.plot([0, 1], [0, 1], "k--")
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.legend(loc="lower right")
    if save:
        result_directory = os.path.join(results_path,foldername)
        image_path = os.path.join(result_directory,name+'_roc_curve.png')
        plt.savefig(image_path)

    if verbose > 0:
        plt.show()
    else:
        plt.close()
        
    return fpr, tpr, roc_auc

In [48]:
def save_model(model, history, elapsed_minutes, results_path = 'results'):
    result_directory = os.path.join(results_path)

    if not os.path.exists(result_directory):
        os.makedirs(result_directory)
    
    name = model.name

    now = datetime.now()
    name = model.name + now.strftime(" [%Y-%m-%d-%H-%M-%S-%f]")

    result_directory = os.path.join(result_directory,name)

    if not os.path.exists(result_directory):
        os.makedirs(result_directory)
    else:
        raise ValueError("File already exists.")
    
    model_path = os.path.join(result_directory,'model')
    model.save(model_path)

    execution_path = os.path.join(result_directory,'execution')

    execution = {
            'epochs': history.params['epochs'],
            'history': history.history,
            'elapsed_minutes': elapsed_minutes
    }

    with open(execution_path, 'wb') as f:
        pickle.dump(execution, f)
    
    pretty_model_path = os.path.join(result_directory, 'model_summary.txt')

    with open(pretty_model_path,'w') as f:
        f.write('Elapsed time: '+str(elapsed_minutes)+' min\n')
        with redirect_stdout(f):
            model.summary()

    return name

In [49]:
def load_model(foldername, results_path = 'results'):
    result_directory = os.path.join(results_path,foldername)
    if not os.path.exists(result_directory):
        raise ValueError("Folder not found.")
    
    model_path = os.path.join(result_directory,'model')
    model = keras.models.load_model(model_path)

    execution_path = os.path.join(result_directory,'execution')
    execution = pickle.load(open(execution_path, "rb"))
    
    return model, execution

In [50]:
def save_evaluation(foldername, name, all_scores, table, results_path = 'results'):
    result_directory = os.path.join(results_path,foldername)
    if not os.path.exists(result_directory):
        raise ValueError("Folder not found.")
    
    score_path = os.path.join(result_directory, name + '_score.json')
    with open(score_path, 'w') as f:
        json.dump(all_scores, f, ensure_ascii=False, indent=4)

    pretty_score_path = os.path.join(result_directory, name + '_score_summary.txt')

    with open(pretty_score_path,'w') as f:
        f.write(foldername+'\n')
        f.write(table.get_string())

In [51]:
def evaluate_model_by_class(model, x, y, verbose = 1):
    def separate_by_class(x, y):
        n_classes = y.shape[1]
        x_classified = [[] for _ in range(n_classes)]
        y_classified = [[] for _ in range(n_classes)]
        
        for i,img in enumerate(y):
            index = np.where(img==1)[0][0]
            x_classified[index].append(x[i])
            y_classified[index].append(y[i])

        for i in range(n_classes):
            x_classified[i] = np.array(x_classified[i])
            y_classified[i] = np.array(y_classified[i])
            
        return np.array(x_classified,dtype=object), np.array(y_classified,dtype=object)

    x_by_class, y_by_class = separate_by_class(x,y)
    
    score_by_class = []
    for x,y in zip(x_by_class,y_by_class):
        score = model.evaluate(x, y, verbose = verbose)
        score_by_class.append(score)

    return score_by_class

def evaluate_model_confusion_matrix(foldername, name, model, x, y, inv_class_map, verbose = 1, save = True, results_path = 'results'):
    def undoOneHotEncoding(y, inv_class_map):
        return [inv_class_map[i] for i in np.argmax(y, axis = 1)]
    
    y_pred = model.predict(x)
    y_pred_int = undoOneHotEncoding(y_pred, inv_class_map)
    y_int = undoOneHotEncoding(y, inv_class_map)
    cm = confusion_matrix(y_int, y_pred_int)

    cm_df = pd.DataFrame(cm,
                        index = [inv_class_map[i] for i in range(len(inv_class_map))], 
                        columns = [inv_class_map[i] for i in range(len(inv_class_map))])
    plt.figure(figsize=(16,16))
    sns.heatmap(cm_df, annot=True)
    plt.title('Confusion Matrix')
    plt.ylabel('Actual Values')
    plt.xlabel('Predicted Values')
    if save:
        results_directory = os.path.join(results_path,foldername)
        image_path = os.path.join(results_directory,name+'_confusion_matrix.png')
        plt.savefig(image_path)
    if verbose > 0:
        plt.show()
    else:
        plt.close()
    return cm

def evaluate_model(foldername, name, model, x, y, inv_class_map, verbose = 1, save = True):
    score = model.evaluate(x, y, verbose = verbose)
    score_by_class = evaluate_model_by_class(model, x, y, verbose)
    cm = evaluate_model_confusion_matrix(foldername, name, model,x, y, inv_class_map, verbose, save)
    _, _, roc_auc = plot_evalution_roc(foldername, name, model, x, y, inv_class_map, verbose , save)
    table = PrettyTable()

    table.add_column("Metrics", model.metrics_names)
    table.add_column("Global", np.round(score,4))

    for i, s_class in enumerate(score_by_class):
        table.add_column(inv_class_map[i], np.round(s_class,4))

    if verbose > 0:
        print()
        print(name)
        print(table)

    all_scores = {
        'model_name': foldername,
        'name': name,
        'loss': score,
        'loss_by_class': score_by_class,
        'confusion': cm.tolist(),
        'auc': list(roc_auc.values())
    }

    if save:
        save_evaluation(foldername, name, all_scores, table)

    return all_scores

###############################################################################################################################################################################################

In [52]:
def get_class_weight(classes, y):
    class_weight = compute_class_weight(class_weight ='balanced', classes = classes, y = y)
    return dict(zip(range(len(classes)),class_weight))

In [53]:
def get_metrics():
    return [
        keras.metrics.Accuracy(name="Accuracy"),
        keras.metrics.TruePositives(name='True_positives'),
        keras.metrics.FalsePositives(name='False_positives'),
        keras.metrics.TrueNegatives(name='True_negatives'),
        keras.metrics.FalseNegatives(name='False_negatives'), 
        keras.metrics.BinaryAccuracy(name='Binary_accuracy'),
        keras.metrics.Precision(name='Precision'),
        keras.metrics.Recall(name='Recall'),
        keras.metrics.AUC(name='AUC'),
        keras.metrics.AUC(name='PRC', curve='PR'), # precision-recall curve
    ]

In [54]:
def run_model(model, x_train, y_train, x_val, y_val, class_weight, verbose = 1, save = True):
    if verbose > 0:
        model.summary()

    model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=get_metrics(), weighted_metrics=get_metrics())
    start_time = time.time()
    history = model.fit(x_train, y_train,
                    batch_size=128,
                    epochs=100,
                    validation_data=(x_val, y_val),
                    class_weight = class_weight,
                    verbose=verbose)

    elapsed_minute = round((time.time() - start_time)/60)
    
    foldername = None
    if save:
        foldername = save_model(model, history, elapsed_minute)
    plot_execution_history(foldername, history.history, model, verbose, save)
      
    return model, history, foldername

###############################################################################################################################################################################################

In [55]:
#foldername = 'classweight'
#model, execution = load_model(foldername)
#history = execution['history']

###############################################################################################################################################################################################

In [56]:
def train_and_evaluate(model_func, dir_path, dataset_percentage,validation_percentage, verbose, seed = 42):
    classes, input_shape, _ = analyse_dataset(dir_path, verbose, seed)
    x, y = load_dataset(dir_path, dataset_percentage , verbose)
    x_train, x_val, y_train, y_val = split_dataset(x, y, validation_percentage, verbose, seed)
    x_train_prepared , y_train_prepared, inv_class_map, input_shape = prepare_dataset(x_train , y_train , classes, input_shape)
    x_val_prepared , y_val_prepared, _, _ = prepare_dataset(x_val , y_val , classes, input_shape)
    model, history, foldername = run_model(model_func(input_shape, classes), x_train_prepared, y_train_prepared, x_val_prepared, y_val_prepared, get_class_weight(classes, y_train), verbose)
    history = history.history
    train_score = evaluate_model(foldername, 'training',model, x_train_prepared, y_train_prepared, inv_class_map, verbose)
    val_score = evaluate_model(foldername, 'validation', model, x_val_prepared, y_val_prepared, inv_class_map, verbose)
    return train_score, val_score

In [57]:
def reevaluate(results_path, dir_path, dataset_percentage,validation_percentage, verbose, seed = 42):
    folders = os.listdir(results_path)
    for f in folders:
        classes, input_shape, _ = analyse_dataset(dir_path, verbose, seed)
        x, y = load_dataset(dir_path, dataset_percentage , verbose)
        x_train, x_val, y_train, y_val = split_dataset(x, y, validation_percentage, verbose, seed)
        x_train_prepared , y_train_prepared, inv_class_map, input_shape = prepare_dataset(x_train , y_train , classes, input_shape)
        x_val_prepared , y_val_prepared, _, _ = prepare_dataset(x_val , y_val , classes, input_shape)
        model, execution = load_model(f)
        _ = execution['history']       
        _ = evaluate_model(f, 'training',model, x_train_prepared, y_train_prepared, inv_class_map, verbose)
        _ = evaluate_model(f, 'validation', model, x_val_prepared, y_val_prepared, inv_class_map, verbose)
        

In [58]:
def stacked_train_and_evaluate(dir_path, dataset_percentage,validation_percentage, verbose, seed = 42):
    stacked_models_0 = [  
                        classweight,
                        classweight_3_conv2d_layers,
                        classweight_4_conv2d_layers,
                        classweight_5_conv2d_layers,
                        classweight_6_conv2d_layers,
                        classweight_7_conv2d_layers,
                    ]

    stacked_models_1 = [  
                        classweight_last_conv2d,
                        classweight_last_conv2d_3_conv2d_layers,
                        classweight_last_conv2d_4_conv2d_layers,
                        classweight_last_conv2d_5_conv2d_layers,
                        classweight_last_conv2d_6_conv2d_layers,
                        classweight_last_conv2d_7_conv2d_layers,
                ]
    for m in stacked_models_1:
        train_and_evaluate(m, dir_path, dataset_percentage,validation_percentage, verbose, seed)

###############################################################################################################################################################################################

In [59]:
# Parameters
dir_path = '../Alzheimer_s Dataset/train'
model_name = 'testing'
results_path = 'results'
verbose = 0
dataset_percentage = 1
validation_percentage = 0.2
seed = 42

In [60]:
## Loading and preparing training dataset
#classes, input_shape, class_dist = analyse_dataset(dir_path, verbose, seed)
#x, y = load_dataset(dir_path, dataset_percentage , verbose)
#x_train, x_val, y_train, y_val = split_dataset(x, y, validation_percentage, verbose, seed)
#x_train_prepared , y_train_prepared, inv_class_map, input_shape = prepare_dataset(x_train , y_train , classes, input_shape)
#x_val_prepared , y_val_prepared, _, _ = prepare_dataset(x_val , y_val , classes, input_shape)
## Executing
#model, history, foldername = run_model(get_model(input_shape, classes), x_train_prepared, y_train_prepared, x_val_prepared,y_val_prepared, get_class_weight(classes, y_train), verbose)
#history = history.history

## Loading
#foldername = 'classweight [2021-11-18-02-52-58-419281]'
#model, execution = load_model(foldername)
#history = execution['history']

## Evaluating CNN on training dataset
#train_score = evaluate_model(foldername, 'training',model, x_train_prepared, y_train_prepared, inv_class_map, verbose)
## Evaluating CNN on validation dataset
#val_score = evaluate_model(foldername, 'validation', model, x_val_prepared, y_val_prepared, inv_class_map, verbose)

In [61]:
#stacked_train_and_evaluate(dir_path, dataset_percentage,validation_percentage, verbose, seed)

In [62]:
#reevaluate(results_path, dir_path, dataset_percentage,validation_percentage, verbose, seed)