In [8]:
pip install imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.13.0-py3-none-any.whl (238 kB)
     ------------------------------------ 238.4/238.4 kB 239.4 kB/s eta 0:00:00
Collecting sklearn-compat<1,>=0.1
  Downloading sklearn_compat-0.1.3-py3-none-any.whl (18 kB)
Installing collected packages: sklearn-compat, imbalanced-learn
Successfully installed imbalanced-learn-0.13.0 sklearn-compat-0.1.3
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import RandomOverSampler
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras import backend as K
from tensorflow.keras.applications import EfficientNetB0

In [2]:
def build_alexnet():
    model = Sequential([
        Conv2D(96, (11, 11), strides=4, activation='relu', input_shape=(224, 224, 3)),
        MaxPooling2D((3, 3), strides=2),
        Conv2D(256, (5, 5), activation='relu', padding='same'),
        MaxPooling2D((3, 3), strides=2),
        Conv2D(384, (3, 3), activation='relu', padding='same'),
        Conv2D(384, (3, 3), activation='relu', padding='same'),
        Conv2D(256, (3, 3), activation='relu', padding='same'),
        MaxPooling2D((3, 3), strides=2),
        Flatten(),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(4096, activation='relu'),
        Dropout(0.5),
        Dense(1000, activation='relu')
    ])
    return model

In [3]:
data_dir = r"C:\Users\mdguf\Downloads\Alzheimers-ADNI"
categories = ['Final EMCI JPEG', 'Final LMCI JPEG', 'Final MCI JPEG', 'Final CN JPEG', 'Final AD JPEG']
img_size = 224
X, y = [], []

def load_images_from_folder(folder, label):
    path = os.path.join(data_dir, folder)
    for img_name in os.listdir(path):
        img_path = os.path.join(path, img_name)
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (img_size, img_size))
            X.append(img)
            y.append(label)

In [4]:
for label, category in enumerate(categories):
    load_images_from_folder(os.path.join('train', category), label)
    load_images_from_folder(os.path.join('test', category), label)

In [6]:
ros = RandomOverSampler()
X, y = ros.fit_resample(X.reshape(X.shape[0], -1), y)
X = X.reshape(-1, img_size, img_size, 3)

In [18]:
print(f"X shape: {X.shape}, y shape: {y.shape}")

X shape: (2900, 224, 224, 3), y shape: (2900,)


In [7]:
# Data Augmentation
datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True)

In [8]:
# Feature Extraction - Hypercolumn
base_model_vgg = VGG16(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
base_model_alex = build_alexnet()
base_model_efficientnet = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 2us/step


In [9]:
def extract_hypercolumn(image):
    vgg_features = base_model_vgg.predict(image)
    alex_features = base_model_alex.predict(image)
    efficientnet_features = base_model_efficientnet.predict(image)
    
    vgg_flatten = vgg_features.flatten()
    alex_flatten = alex_features.flatten()
    efficientnet_flatten = efficientnet_features.flatten()
    
    return np.concatenate([vgg_flatten, alex_flatten, efficientnet_flatten])

In [10]:
X_features = np.array([extract_hypercolumn(img.reshape(1, img_size, img_size, 3)) for img in X])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 849ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 97ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 98ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 108ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 316ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 324ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

In [17]:
print(X_features.shape)

(2900, 88808)


In [11]:
# Dimensionality Reduction using PCA
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_features)
pca = PCA(n_components=100)
X_pca = pca.fit_transform(X_scaled)

In [12]:
# Dimensionality Reduction using Autoencoder
input_dim = X_scaled.shape[1]
input_layer = Input(shape=(input_dim,))
encoded = Dense(100, activation='relu')(input_layer)
encoded = Dense(50, activation='relu')(encoded)
decoded = Dense(100, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)
autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(X_scaled, X_scaled, epochs=20, batch_size=32, verbose=1)
X_autoencoded = autoencoder.predict(X_scaled)

Epoch 1/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 309ms/step - loss: 0.8469
Epoch 2/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 270ms/step - loss: 0.7046
Epoch 3/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 279ms/step - loss: 0.7366
Epoch 4/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 277ms/step - loss: 0.6693
Epoch 5/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 297ms/step - loss: 0.7032
Epoch 6/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 320ms/step - loss: 0.6618
Epoch 7/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 316ms/step - loss: 0.6321
Epoch 8/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 394ms/step - loss: 0.7040
Epoch 9/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 352ms/step - loss: 0.6194
Epoch 10/20
[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 331ms

In [13]:
# Train ML Models
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
models = {
    'SVM': SVC(),
    'Random Forest': RandomForestClassifier(),
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'KNN': KNeighborsClassifier()
}

In [14]:
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f'{name} Accuracy: {acc:.2f}')

SVM Accuracy: 0.68
Random Forest Accuracy: 0.90


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Accuracy: 0.52
Decision Tree Accuracy: 0.83
KNN Accuracy: 0.69


In [15]:
for name, model in models.items():
    model.fit(X_train, y_train)
    
    # Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Accuracy Calculation
    train_acc = accuracy_score(y_train, y_train_pred)
    test_acc = accuracy_score(y_test, y_test_pred)
    
    print(f'{name} - Training Accuracy: {train_acc:.2f}, Validation Accuracy: {test_acc:.2f}')


SVM - Training Accuracy: 0.77, Validation Accuracy: 0.68
Random Forest - Training Accuracy: 1.00, Validation Accuracy: 0.91
Logistic Regression - Training Accuracy: 0.59, Validation Accuracy: 0.52


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Decision Tree - Training Accuracy: 1.00, Validation Accuracy: 0.82
KNN - Training Accuracy: 0.81, Validation Accuracy: 0.69


In [16]:
import joblib

base_model_alex.save('alexnet_trained.h5')
base_model_vgg.save('vgg16_base.h5')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(pca, 'pca.pkl')
joblib.dump(models['Random Forest'], 'rf_model.pkl')



['rf_model.pkl']