In [49]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout
import os
import random
from IPython.display import display
from matplotlib import image
from sklearn.model_selection import train_test_split

In [196]:
def get_classes(dir_path):
    return os.listdir(dir_path)

def data_analysis_histogram(dir_path, classes, verbose = 1):
    class_dist = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        class_dist.append(len(os.listdir(class_path)))
    
    if verbose > 0:
        plt.figure(figsize=(16, 8))
        plt.title("Class distribution")
        plt.barh(classes, class_dist)
        for index, value in enumerate(class_dist):
            plt.text(value, index,str(value))
        plt.show()

def data_analysis_image_size(dir_path, classes, verbose = 1, seed = -1):
    if seed>=0:
        random.seed(seed)
    random_class_path = os.path.join(dir_path,random.choice(classes))
    random_img_name = random.choice(os.listdir(random_class_path))
    random_img_path = os.path.join(random_class_path,random_img_name)
    if verbose > 0:
        img = image.imread(random_img_path)
        plt.figure(figsize=(16, 8))
        plt.title("%s - Height: %d px x Length: %d px" % (random_img_path,img.shape[0],img.shape[1]))
        plt.imshow(img)

def analyse_dataset(dir_path, verbose = 1, seed = -1):
    classes = get_classes(dir_path)
    data_analysis_histogram(dir_path,classes, verbose)
    data_analysis_image_size(dir_path,classes, verbose, seed)
    return classes


In [209]:
def load_dataset(dir_path, verbose = 1):
    classes = get_classes(dir_path)
    img_array = []
    class_array = []
    for c in classes:
        class_path = os.path.join(dir_path,c)
        imgs_name = os.listdir(class_path)
        for i in imgs_name:
            img_array.append(image.imread(os.path.join(class_path,i)))
            class_array.append(c)
    if verbose > 0:
        print("Loaded %d images" % len(img_array))
    return np.array(img_array), np.array(class_array)

In [100]:
def split_dataset(x, y, val_size = 0.2, verbose = 1, seed = 42):
    x_train, x_val, y_train, y_val = train_test_split(x,  y, test_size=val_size, random_state=seed)
    if verbose > 0:
        print("Train size: %d\nValidation size: %d" % (len(x_train), len(x_val)))
    return x_train, x_val, y_train, y_val


In [220]:
def prepare_dataset(x , y , classes):
    x_scaled = x.astype('float32') / 255.0
    class_map = {x: i for i,x in enumerate(classes)}
    y_code = [class_map[word] for word in y]
    y_categorical = keras.utils.to_categorical(y_code, len(classes))

    inv_class_map = {v: k for k, v in class_map.items()}
    return x_scaled , y_categorical, inv_class_map, class_map

In [197]:
classes = analyse_dataset('../Alzheimer_s Dataset/train', 0)

In [211]:
x, y = load_dataset('../Alzheimer_s Dataset/train',1)

Loaded 5121 images


In [212]:
x_train, x_val, y_train, y_val = split_dataset(x, y, 0.2, 1)

Train size: 4096
Validation size: 1025


In [221]:
x_train_prepared , y_train_prepared, class_map, _ = prepare_dataset(x_train , y_train , classes)