In [1]:
# imports
from datasets import load_dataset
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


Load Data

In [2]:
# load train dataset, print number of samples and features
dataset_train = load_dataset('Falah/Alzheimer_MRI', split='train')
dataset_train = dataset_train.to_pandas()
print("Number of examples:", len(dataset_train))
print("Sample data:")
for example in dataset_train[:5]:
    print(example)

Number of examples: 5120
Sample data:
image
label


In [3]:
# load test dataset, print number of samples and features
dataset_test = load_dataset('Falah/Alzheimer_MRI', split='test')
dataset_test = dataset_test.to_pandas()
print("Number of examples:", len(dataset_test))
print("Sample data:")
for example in dataset_test[:5]:
    print(example)

Number of examples: 1280
Sample data:
image
label


In [4]:
# categories
disease_label_from_category = {
    0: "Mild Demented",
    1: "Moderate Demented",
    2: "Non Demented",
    3: "Very Mild Demented",
}

In [5]:
# convert bytes to image values representing intensity of pixels
def dict_to_image(image_dict):
    if isinstance(image_dict, dict) and 'bytes' in image_dict:
        byte_string = image_dict['bytes']
        nparr = np.frombuffer(byte_string, np.uint8)
        img = cv2.imdecode(nparr, cv2.IMREAD_GRAYSCALE)
        return img
    else:
        raise TypeError(f"Expected dictionary with 'bytes' key, got {type(image_dict)}")

In [6]:
# convert train
dataset_train['img_arr'] = dataset_train['image'].apply(dict_to_image)
dataset_train.drop("image", axis=1, inplace=True)
dataset_train.head()

Unnamed: 0,label,img_arr
0,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
3,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
4,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


In [7]:
# convert test
dataset_test['img_arr'] = dataset_test['image'].apply(dict_to_image)
dataset_test.drop("image", axis=1, inplace=True)
dataset_test.head()

Unnamed: 0,label,img_arr
0,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
1,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
2,2,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
3,3,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."
4,0,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,..."


Normalizations

In [8]:
# min/max scaling
def min_max_scale(image_array, new_min=0, new_max=1):
    old_min = np.min(image_array)
    old_max = np.max(image_array)
    if old_min == old_max:
        return np.full(image_array.shape, new_min)
    scaled_array = (image_array - old_min) / (old_max - old_min) * (new_max - new_min) + new_min
    return scaled_array

In [9]:
# create min/max scaled train data
min_max_dataset_train = pd.DataFrame()
min_max_dataset_train['label'] = dataset_train['label']
min_max_images_train = []
for i in dataset_train['img_arr']:
    scaled_image = min_max_scale(i, 0, 1)
    min_max_images_train.append(scaled_image)
min_max_dataset_train['img_arr'] = min_max_images_train
    

In [13]:
# z score standardization
def z_score_standardize(image_array):
    mean = np.mean(image_array)
    std = np.std(image_array)
    if std == 0:
        return np.zeros_like(image_array)
    standardized_array = (image_array - mean) / std
    return standardized_array

In [14]:
# create z score standardized train dataset
z_score_dataset_train = pd.DataFrame()
z_score_dataset_train['label'] = dataset_train['label']
z_score_images_train = []
for i in dataset_train['img_arr']:
    scaled_image = z_score_standardize(i)
    z_score_images_train.append(scaled_image)
z_score_dataset_train['img_arr'] = z_score_images_train

In [17]:
# local contrast normalization
import scipy.ndimage
# kernel_size (int): Size of the local neighborhood window.
# epsilon (float): Small constant to prevent division by zero.
def local_contrast_normalization(image_array, kernel_size=3, epsilon=1e-8):
    # Compute local mean using a uniform filter
    local_mean = scipy.ndimage.uniform_filter(image_array, size=kernel_size, mode='reflect')
    # Compute local variance using a squared filter
    local_sqr_mean = scipy.ndimage.uniform_filter(image_array ** 2, size=kernel_size, mode='reflect')
    local_std = np.sqrt(local_sqr_mean - local_mean ** 2 + epsilon)
    # Normalize the image
    normalized_image = (image_array - local_mean) / local_std
    return normalized_image

In [18]:
# create local contrast normalization train dataset
local_contrast_dataset_train = pd.DataFrame()
local_contrast_dataset_train['label'] = dataset_train['label']
local_contrast_images_train = []
for i in dataset_train['img_arr']:
    scaled_image = local_contrast_normalization(i)
    local_contrast_images_train.append(scaled_image)
local_contrast_dataset_train['img_arr'] = local_contrast_images_train