In [13]:
# ============================================
# CELL 1: IMPORTS
# ============================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import sys
from PIL import Image
import cv2
from collections import Counter
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

print("‚úÖ Imports done!")







‚úÖ Imports done!


In [14]:
#second cell: FOLDER STRUCTURE
# ============================================
base_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'

print("üìÅ FOLDER STRUCTURE:\n")
for root, dirs, files in os.walk(base_path):
    level = root.replace(base_path, '').count(os.sep)
    indent = '  ' * level
    print(f"{indent}üìÇ {os.path.basename(root)}/")
    if level < 2:  # Don't go too deep
        for d in dirs:
            print(f"{indent}  üìÅ {d}")


üìÅ FOLDER STRUCTURE:



In [15]:
#third cell: image counts per class
# ============================================
augmented_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'

class_counts = {}
for class_name in os.listdir(augmented_path):
    class_path = os.path.join(augmented_path, class_name)
    if os.path.isdir(class_path):
        count = len(os.listdir(class_path))
        class_counts[class_name] = count

# Display as DataFrame
df_counts = pd.DataFrame({
    'Class': list(class_counts.keys()),
    'Count': list(class_counts.values())
})
df_counts = df_counts.sort_values('Count', ascending=False)
print(df_counts.to_string(index=False))
print(f"\nüìä Total Images: {sum(class_counts.values())}")




FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'

In [None]:
# CELL 4: PLOT CLASS DISTRIBUTION
# ============================================
plt.figure(figsize=(10, 6))
colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4']
bars = plt.bar(class_counts.keys(), class_counts.values(), color=colors)
plt.title('üß† Alzheimer MRI Dataset - Class Distribution', fontsize=14, fontweight='bold')
plt.xlabel('Dementia Stage')
plt.ylabel('Number of Images')
plt.xticks(rotation=15)

# Add count labels on bars
for bar, count in zip(bars, class_counts.values()):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 100, 
             str(count), ha='center', fontweight='bold')
    
plt.tight_layout()
plt.show()

# Check if balanced
print("\nüîç CLASS BALANCE CHECK:")
max_count = max(class_counts.values())
min_count = min(class_counts.values())
print(f"   Max: {max_count}, Min: {min_count}")
print(f"   Ratio: {max_count/min_count:.2f}x difference")



In [None]:
# CELL 5: VIEW SAMPLE IMAGES FROM EACH CLASS
# ============================================
augmented_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'

fig, axes = plt.subplots(4, 5, figsize=(15, 12))
class_names = ['NonDemented', 'VeryMildDemented', 'MildDemented', 'ModerateDemented']

for row, class_name in enumerate(class_names):
    class_path = os.path.join(augmented_path, class_name)
    images = os.listdir(class_path)[:5]  # Get first 5 images
    
    for col, img_name in enumerate(images):
        img_path = os.path.join(class_path, img_name)
        img = Image.open(img_path)
        axes[row, col].imshow(img, cmap='gray')
        axes[row, col].axis('off')
        if col == 0:
            axes[row, col].set_title(f'{class_name}', fontsize=12, fontweight='bold')

plt.suptitle('üß† MRI Samples: Progression of Alzheimer\'s Disease', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()



In [None]:
# CELL 6: CHECK IMAGE DIMENSIONS & PROPERTIES
# ============================================
augmented_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'

# Sample random images to check dimensions
sample_dims = []
sample_modes = []

for class_name in os.listdir(augmented_path):
    class_path = os.path.join(augmented_path, class_name)
    if os.path.isdir(class_path):
        images = os.listdir(class_path)[:50]  # Check 50 from each class
        for img_name in images:
            img = Image.open(os.path.join(class_path, img_name))
            sample_dims.append(img.size)
            sample_modes.append(img.mode)
# Analyze dimensions
unique_dims = Counter(sample_dims)
unique_modes = Counter(sample_modes)

print("üìê IMAGE DIMENSIONS:")
for dim, count in unique_dims.most_common():
    print(f"   {dim[0]} x {dim[1]} pixels: {count} images")

print(f"\nüé® COLOR MODES:")
for mode, count in unique_modes.items():
    mode_desc = {'L': 'Grayscale', 'RGB': 'Color', 'RGBA': 'Color+Alpha'}
    print(f"   {mode_desc.get(mode, mode)}: {count} images")
# Get one sample for detailed info
sample_img = Image.open(os.path.join(augmented_path, 'NonDemented', os.listdir(os.path.join(augmented_path, 'NonDemented'))[0]))
print(f"\nüìã SAMPLE IMAGE INFO:")
print(f"   Size: {sample_img.size}")
print(f"   Mode: {sample_img.mode}")
print(f"   Format: {sample_img.format}")



In [None]:
# CELL 7: MARKDOWN - DATA ANALYSIS SUMMARY
# ============================================
from IPython.display import Markdown, display

summary = """
## üìã Data Analysis Summary

### Dataset Overview
- **Total Images:** 33,984
- **Classes:** 4 (NonDemented, VeryMildDemented, MildDemented, ModerateDemented)
- **Class Balance:** 1.49x ratio (acceptable)

### Image Properties
- **Sizes:** Mixed (200√ó190 and 180√ó180) ‚Üí Need resizing
- **Format:** JPEG, RGB color mode
- **Quality:** Good, augmentation already applied
### Medical Insight
- Alzheimer's causes **brain atrophy** (shrinkage)
- **Ventricles enlarge** as disease progresses
- Model should learn to detect these structural changes

### Preprocessing Decisions
1. ‚úÖ Resize all images to **176√ó176** (balanced size)
2. ‚úÖ Keep RGB or convert to grayscale
3. ‚úÖ Normalize pixel values to [0,1]
4. ‚úÖ Use Original dataset for validation (no data leakage)
"""
display(Markdown(summary))

# Clear any cached imports
import sys
if 'google.protobuf' in sys.modules:
    del sys.modules['google.protobuf']




In [None]:
#note this cell is slightly difrent from the chromebook version
# CELL 8: BUILD DATA GENERATORS
# ============================================
# CELL 8: BUILD DATA GENERATORS
# ============================================
train_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'
test_path = 'C:\\Users\\bbnro\\Downloads\\archive\\OriginalDataset'

# Image parameters
IMG_SIZE = (176, 176)
BATCH_SIZE = 32

# Load training data
train_generator = tf.keras.utils.image_dataset_from_directory(
    train_path,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    validation_split=0.15,
    subset='training',
    seed=42
)

# Store class names before mapping
class_names = sorted(os.listdir(train_path))

# Load validation data
val_generator = tf.keras.utils.image_dataset_from_directory(
    train_path,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    validation_split=0.15,
    subset='validation',
    seed=42
)

# Load test data
test_generator = tf.keras.utils.image_dataset_from_directory(
    test_path,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    label_mode='categorical'
)

# Normalize pixel values
normalize = tf.keras.layers.Rescaling(1./255)
train_generator = train_generator.map(lambda x, y: (normalize(x), y))
val_generator = val_generator.map(lambda x, y: (normalize(x), y))
test_generator = test_generator.map(lambda x, y: (normalize(x), y))

print("\n‚úÖ Data Generators Ready!")
print(f"   Training samples: {tf.data.experimental.cardinality(train_generator).numpy() * BATCH_SIZE}")
print(f"   Validation samples: {tf.data.experimental.cardinality(val_generator).numpy() * BATCH_SIZE}")
print(f"   Test samples: {tf.data.experimental.cardinality(test_generator).numpy() * BATCH_SIZE}")


# ============================================
# ============================================


In [None]:
# CELL 9: VISUALIZE A BATCH
# ============================================
# Get one batch to verify
images, labels = next(iter(train_generator))

fig, axes = plt.subplots(2, 4, figsize=(12, 6))

for i, ax in enumerate(axes.flat):
    # Ensure image is in correct format for display
    img_display = (images[i].numpy() * 255).astype(np.uint8)
    ax.imshow(img_display, cmap='gray')
    label_idx = np.argmax(labels[i])
    ax.set_title(f'{class_names[label_idx]}')
    ax.axis('off')

plt.suptitle('Sample Batch from Training Generator', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

print(f"Batch shape: {images.shape}")
print(f"Labels shape: {labels.shape}")
print(f"Pixel range: [{images.numpy().min():.3f}, {images.numpy().max():.3f}]")



In [None]:
# CELL 10: FIX PROTOBUF (Run this first!)
# ============================================
#!pip uninstall -y protobuf
#!pip install protobuf==3.20.3 --quiet

print("‚úÖ Protobuf fixed! Now restart kernel:")
print("   Session ‚Üí Restart Session")
print("   Then re-run all cells before continuing")

In [None]:
# CELL 11: IMPORTS & GPU CHECK
# ============================================
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.applications import EfficientNetB3, ResNet50V2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
# Check GPU
print("üî• GPU Status:")
print(f"   TensorFlow version: {tf.__version__}")
print(f"   GPU Available: {tf.config.list_physical_devices('GPU')}")
print(f"   Built with CUDA: {tf.test.is_built_with_cuda()}")

# Set seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("\n‚úÖ All imports successful!")

In [None]:
# CELL 12: ENHANCED DATA GENERATORS
# ============================================

# Paths

train_path = 'C:\\Users\\bbnro\\Downloads\\archive\\AugmentedAlzheimerDataset'
test_path = 'C:\\Users\\bbnro\\Downloads\\archive\\OriginalDataset'
# Image parameters
IMG_SIZE = (224, 224)  # EfficientNet/ResNet optimal size
BATCH_SIZE = 16  # Smaller batch for better generalization

# Training generator - MINIMAL augmentation (data already augmented!)
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,  # Slight rotation only
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    validation_split=0.2  # 20% for validation
)

# Test generator - NO augmentation
test_datagen = ImageDataGenerator(rescale=1./255)

# Create generators
train_gen = train_datagen.flow_from_directory(
    train_path,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

val_gen = train_datagen.flow_from_directory(
    train_path,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)
test_gen = test_datagen.flow_from_directory(
    test_path,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=False
)

# Calculate class weights to handle imbalance
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_gen.classes),
    y=train_gen.classes
)
class_weights_dict = dict(enumerate(class_weights))

print("\n‚úÖ Data Generators Created!")
print(f"   Training samples: {train_gen.samples}")
print(f"   Validation samples: {val_gen.samples}")
print(f"   Test samples: {test_gen.samples}")
print(f"\nüìö Class Mapping: {train_gen.class_indices}")
print(f"\n‚öñÔ∏è Class Weights: {class_weights_dict}")

In [None]:
# CELL 13: VERIFY DATA PIPELINE
# ============================================

images, labels = next(train_gen)

fig, axes = plt.subplots(2, 4, figsize=(14, 7))
class_names = list(train_gen.class_indices.keys())

for i, ax in enumerate(axes.flat):
    ax.imshow(images[i])
    label_idx = np.argmax(labels[i])
    ax.set_title(f'{class_names[label_idx]}', fontsize=11, fontweight='bold')
    ax.axis('off')

plt.suptitle('‚úÖ Sample Training Batch (After Preprocessing)', fontsize=15, fontweight='bold')
plt.tight_layout()
plt.show()

print(f"\nüìä Batch Info:")
print(f"   Shape: {images.shape}")
print(f"   Pixel range: [{images.min():.3f}, {images.max():.3f}]")
print(f"   Data type: {images.dtype}")


In [None]:
# CELL 14: BUILD CUSTOM CNN (NO DOWNLOADS!)
# ============================================

def build_custom_cnn():
    """
    Custom CNN optimized for MRI brain scans
    Designed to hit 98%+ accuracy
    """
    model = models.Sequential([
        # Block 1
        layers.Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
        layers.BatchNormalization(),
        layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        # Block 2
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        # Block 3
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        # Block 4
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2, 2)),
        layers.Dropout(0.25),
        
        # Block 5 (Deeper)
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(512, (3, 3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.GlobalAveragePooling2D(),
        # Dense layers
        layers.Dense(512, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.5),
        
        layers.Dense(256, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.4),
        
        layers.Dense(128, activation='relu'),
        layers.BatchNormalization(),
        layers.Dropout(0.3),
        # Output layer
        layers.Dense(4, activation='softmax')
    ])
    
    return model

model = build_custom_cnn()

# Compile with Adam optimizer
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='categorical_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)
print("‚úÖ Custom CNN Built!")
print(f"\nüìä Total Parameters: {model.count_params():,}")
model.summary()

In [None]:
# CELL 15: CALLBACKS (SAME AS BEFORE)
# ============================================

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=7,  # Increased patience for custom CNN
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=4,
    min_lr=1e-7,
    verbose=1
)
checkpoint = ModelCheckpoint(
    'best_alzheimer_model.h5',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

callbacks = [early_stop, reduce_lr, checkpoint]
print("‚úÖ Callbacks ready!")

In [None]:
# # CELL 16: TRAIN THE MODEL
# # ============================================

# print("üöÄ Starting training...")
# print(f"‚ö†Ô∏è GPU Status: {'ENABLED ‚úÖ' if tf.config.list_physical_devices('GPU') else 'DISABLED - Training will be slow ‚ö†Ô∏è'}")

# history = model.fit(
#     train_gen,
#     validation_data=val_gen,
#     epochs=30,  # Will stop early if needed
#     class_weight=class_weights_dict,
#     callbacks=callbacks,
#     verbose=1
# )

# print("\n‚úÖ Training Complete!")