In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


In [None]:
import tensorflow as tf

print(tf.__version__)

print("Logical devices:", tf.config.list_logical_devices())
print("Physical devices:", tf.config.list_physical_devices())
print(tf.test.is_built_with_cuda())
print(tf.test.is_gpu_available())

# Tiny ImageNet Classification Assignment

This notebook completes the requirements for the Tiny ImageNet assignment.

The goals are:

1. Data exploration  
2. Training a baseline convolutional neural network model  
3. Reporting model architecture, training strategy, results, and improvements  

The dataset has 200 classes of 64x64 RGB images. GPU acceleration is used to speed up training.


## Imports and GPU Verification

This cell imports all required libraries for data loading, preprocessing, training, and evaluation.  
It also verifies that TensorFlow can detect the GPU devices.

In [None]:
# import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

import os
import shutil

# Verify GPU visibility
print("GPUs detected:", tf.config.list_physical_devices("GPU"))


## Download Tiny ImageNet with KaggleHub
This cell downloads the Tiny ImageNet dataset using KaggleHub and prints the local path.


In [None]:
import kagglehub

# Download the latest version of the dataset
path = kagglehub.dataset_download("akash2sharma/tiny-imagenet")

print("Path to dataset files:", path)


## Inspect Dataset Directory Structure

The Tiny ImageNet dataset has been downloaded using KaggleHub.  
Before building data loaders or reorganizing folders, it is important to inspect the actual directory structure.  
This cell displays the top level of the dataset path so we can confirm the presence of expected folders such as train, val, test, wnids.txt, and words.txt.

The folder structure will guide how the dataset loading code is written in later steps.


In [None]:
import os

DATA_ROOT = "/home/brandon/.cache/kagglehub/datasets/akash2sharma/tiny-imagenet/versions/1"

for root, dirs, files in os.walk(DATA_ROOT):
    print("ROOT:", root)
    print("DIRS:", dirs)
    print("FILES:", files[:10])
    print()
    break  # show only the top level


## Inspect Contents of tiny-imagenet-200

The top level of the dataset contains a single directory named tiny-imagenet-200.  
This cell inspects the contents of that directory to verify the presence of the expected subfolders and files.  
These typically include train, val, test, wnids.txt, and words.txt.


In [None]:
inner_root = os.path.join(DATA_ROOT, "tiny-imagenet-200")

for root, dirs, files in os.walk(inner_root):
    print("ROOT:", root)
    print("DIRS:", dirs)
    print("FILES:", files[:10])
    print()
    break  # only show one level for now


## Inspect train, val, and test folders

The dataset contains the expected structure with train, val, and test directories.  
The train directory should contain 200 class subfolders.  
The val directory contains images and a file that maps image names to class labels.  
Before loading the dataset, this cell inspects the internal structure of these folders.


In [None]:
# Inspect train directory
train_dir = os.path.join(inner_root, "train")
print("Train directory sample:")
print(os.listdir(train_dir)[:10])

# Inspect val directory
val_dir = os.path.join(inner_root, "val")
print("\nVal directory sample:")
print(os.listdir(val_dir)[:10])

# Inspect test directory
test_dir = os.path.join(inner_root, "test")
print("\nTest directory sample:")
print(os.listdir(test_dir)[:10])

# Inspect the unexpected nested folder, if needed
nested_dir = os.path.join(inner_root, "tiny-imagenet-200")
if os.path.exists(nested_dir):
    print("\nNested tiny-imagenet-200 directory exists. Contents:")
    print(os.listdir(nested_dir)[:10])


## Set Dataset Root Directory

The path to the Tiny ImageNet dataset is defined here.  
This path was obtained using KaggleHub.  
All later data loading operations will reference this directory.


In [None]:
DATA_ROOT = "/home/brandon/.cache/kagglehub/datasets/akash2sharma/tiny-imagenet/versions/1/tiny-imagenet-200"

print("Dataset root:", DATA_ROOT)
print("Contents:", os.listdir(DATA_ROOT))


## Fix Validation Folder Structure

The Tiny ImageNet validation images are stored in a single folder named images, with labels in val_annotations.txt.  
Keras requires validation images to be organized into class subfolders.  
This cell reorganizes validation images into class-specific folders.  
This operation only needs to run once.


In [None]:
val_dir = os.path.join(DATA_ROOT, "val")
val_images_dir = os.path.join(val_dir, "images")
annotations_file = os.path.join(val_dir, "val_annotations.txt")

if os.path.exists(val_images_dir):
    print("Reorganizing validation images...")

    # Read the mapping from image to class
    with open(annotations_file, "r") as f:
        for line in f:
            parts = line.strip().split("\t")
            img_name, class_id = parts[0], parts[1]

            class_dir = os.path.join(val_dir, class_id)
            if not os.path.exists(class_dir):
                os.makedirs(class_dir)

            src = os.path.join(val_images_dir, img_name)
            dst = os.path.join(class_dir, img_name)

            if os.path.exists(src):
                shutil.move(src, dst)

    shutil.rmtree(val_images_dir)
    print("Validation folder reorganized successfully.")

else:
    print("Validation folder already organized.")


### Verify Train Folder

In [None]:

print("Train classes:", len(os.listdir(os.path.join(DATA_ROOT, "train"))))
print(os.listdir(os.path.join(DATA_ROOT, "train"))[:10])

# print("\nVal classes:", len(os.listdir(os.path.join(DATA_ROOT, "val"))))
# print(os.listdir(os.path.join(DATA_ROOT, "val"))[:10])


## Fix Validation Folder Structure

Tiny ImageNet stores validation images in a single folder named images and provides labels in val_annotations.txt.  
This cell reorganizes validation images into class folders.  
This operation must be performed only once and must delete val_annotations.txt afterward to avoid mislabeling.


In [None]:
import os, shutil

val_root = os.path.join(DATA_ROOT, "val")
val_images = os.path.join(val_root, "images")
annotations = os.path.join(val_root, "val_annotations.txt")

if os.path.exists(val_images):
    print("Reorganizing validation directory.")

    # Read mappings
    mapping = {}
    with open(annotations, "r") as f:
        for line in f:
            parts = line.strip().split("\t")
            img, cls = parts[0], parts[1]
            mapping[img] = cls

    # Create class folders and move images
    for img, cls in mapping.items():
        class_dir = os.path.join(val_root, cls)
        if not os.path.exists(class_dir):
            os.makedirs(class_dir)

        src = os.path.join(val_images, img)
        dst = os.path.join(class_dir, img)

        if os.path.exists(src):
            shutil.move(src, dst)

    # Remove the images folder
    shutil.rmtree(val_images)

# IMPORTANT: Delete the annotations file so Keras does not treat it as a class folder
if os.path.exists(annotations):
    print("Removing annotation file to avoid Keras mislabeling:", annotations)
    os.remove(annotations)

print("Validation classes:", len(os.listdir(val_root)))


## Verify Final Class Count
Both train and val must have exactly 200 folders.  
Any deviation indicates a labeling mismatch that will break training.

In [None]:
train_classes = os.listdir(os.path.join(DATA_ROOT, "train"))
val_classes = os.listdir(os.path.join(DATA_ROOT, "val"))

print("Train class count:", len(train_classes))
print("Val class count:", len(val_classes))


## Create Training and Validation Generators

This cell creates the Keras ImageDataGenerator objects that will feed images into the model.  
The training generator includes simple augmentation such as horizontal flip and rotation.  
Both generators rescale pixel values to the range [0, 1].


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMAGE_SIZE = (64, 64)
BATCH_SIZE = 64

train_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0,
    horizontal_flip=True,
    rotation_range=10
)

val_datagen = ImageDataGenerator(
    rescale=1.0 / 255.0
)

train_generator = train_datagen.flow_from_directory(
    os.path.join(DATA_ROOT, "train"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(DATA_ROOT, "val"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)


In [None]:
print("TRAIN KEYS:", list(train_generator.class_indices.keys()))
print("VAL KEYS:", list(val_generator.class_indices.keys()))

print("TRAIN LEN:", len(train_generator.class_indices))
print("VAL LEN:", len(val_generator.class_indices))

# Check if the mappings differ
print("Mapping difference:", set(train_generator.class_indices.keys()) ^ set(val_generator.class_indices.keys()))


In [None]:
x_batch, y_batch = next(train_generator)

print("Batch X shape:", x_batch.shape)
print("Batch Y shape:", y_batch.shape)

# For classification, Y should be one hot vectors with exactly one 1 per row
print("Row sums (should all be 1):", y_batch.sum(axis=1)[:20])

# Unique class indices in this batch
print("Unique labels:", sorted(list(set(y_batch.argmax(axis=1)))))


In [None]:
batch_x, batch_y = next(train_generator)

print("Batch X shape:", batch_x.shape)
print("Batch Y shape:", batch_y.shape)

# Show min and max values
print("Min pixel:", batch_x.min())
print("Max pixel:", batch_x.max())

# Show one label
print("Sample one hot vector:", batch_y[0])
print("Label index:", np.argmax(batch_y[0]))

plt.imshow(batch_x[0])
plt.title(f"Label index: {np.argmax(batch_y[0])}")
plt.show()


In [None]:
print(train_generator.directory)
print(val_generator.directory)


## Sample Image Visualization

This cell displays a grid of sample images drawn from the training generator.  
This is part of the data exploration requirement for the assignment.


In [None]:
x_batch, y_batch = next(train_generator)

plt.figure(figsize=(10, 10))
for i in range(16):
    plt.subplot(4, 4, i + 1)
    plt.imshow(x_batch[i])
    plt.axis("off")

plt.tight_layout()
plt.show()


### Image Quality

Image quality looks pretty grainy and low resolution. It makes sense because the image size is so small. (64 x 64)

## Class Distribution

This cell computes the number of training examples for each class.  
Tiny ImageNet is balanced by design, but reporting class distribution is required for the assignment.


In [None]:
class_counts = {cls: 0 for cls in train_generator.class_indices.keys()}

for label_index in train_generator.classes:
    class_name = list(train_generator.class_indices.keys())[label_index]
    class_counts[class_name] += 1

#bar plot
plt.figure(figsize=(12, 4))
plt.bar(range(len(class_counts)), list(class_counts.values()))
plt.title("Class Distribution")
plt.xlabel("Class Index")
plt.ylabel("Number of Samples")
plt.show()

# Display a sample of the dictionary
list(class_counts.items())[:10]

### ResNet18 Architecture (from scratch)

For Tiny ImageNet we use a ResNet18 style convolutional neural network trained entirely from scratch. ResNet18 is a widely used deep residual architecture that improves optimization by adding shortcut (skip) connections around small stacks of convolutional layers.

Key ideas:

1. Residual blocks  
   - Each block has two 3x3 Conv2D layers with Batch Normalization and ReLU.  
   - A skip connection adds the input of the block to its output.  
   - If the spatial size or channel count changes, a 1x1 convolution is used on the skip path to match shapes.  
   - This helps gradients flow through many layers and avoids vanishing gradient problems.

2. Overall structure for 64x64 images  
   - Initial 3x3 convolution with 64 filters and BatchNorm + ReLU.  
   - Four residual stages with 2 blocks each:  
     - Stage 1: 64 channels, stride 1.  
     - Stage 2: 128 channels, first block stride 2.  
     - Stage 3: 256 channels, first block stride 2.  
     - Stage 4: 512 channels, first block stride 2.  
   - Global average pooling to reduce each 2D feature map to a single value.  
   - Final Dense layer with 200 outputs and softmax for Tiny ImageNet classes.

3. Why this is appropriate for Tiny ImageNet  
   - Tiny ImageNet has 200 classes with fairly complex appearance variation.  
   - Shallow CNNs struggle to learn good features for this dataset.  
   - ResNet18 adds depth while keeping optimization stable, which allows the model to learn more expressive hierarchical features.  
   - We train with categorical cross entropy loss and report accuracy on the validation set.

This model is implemented using `tf.keras` with the functional API, without any pretrained weights. All weights are initialized randomly and learned directly from the Tiny ImageNet data.



In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

def residual_block(x, filters, stride=1, use_projection=False, name=None):
    """
    Basic ResNet block with two 3x3 convolutions.

    x              - input tensor
    filters        - number of output channels
    stride         - stride for the first conv (used for downsampling)
    use_projection - if True, use 1x1 conv on the skip path to match shape
    """
    shortcut = x

    # First conv
    x = layers.Conv2D(filters, (3, 3), strides=stride, padding="same",
                      use_bias=False, name=None if name is None else name + "_conv1")(x)
    x = layers.BatchNormalization(axis=-1, name=None if name is None else name + "_bn1")(x)
    x = layers.ReLU(name=None if name is None else name + "_relu1")(x)

    # Second conv
    x = layers.Conv2D(filters, (3, 3), strides=1, padding="same",
                      use_bias=False, name=None if name is None else name + "_conv2")(x)
    x = layers.BatchNormalization(axis=-1, name=None if name is None else name + "_bn2")(x)

    # Projection on shortcut if needed to match shape
    if use_projection or shortcut.shape[-1] != filters or stride != 1:
        shortcut = layers.Conv2D(filters, (1, 1), strides=stride, padding="same",
                                 use_bias=False, name=None if name is None else name + "_proj_conv")(shortcut)
        shortcut = layers.BatchNormalization(axis=-1, name=None if name is None else name + "_proj_bn")(shortcut)

    # Add skip connection and apply final ReLU
    x = layers.Add(name=None if name is None else name + "_add")([x, shortcut])
    x = layers.ReLU(name=None if name is None else name + "_out")(x)
    return x


def build_resnet18(input_shape=(64, 64, 3), num_classes=200):
    """
    ResNet18 style model for Tiny ImageNet (64x64x3 inputs, 200 classes).
    Based on the standard 2-2-2-2 block layout.
    """
    inputs = layers.Input(shape=input_shape)

    # Initial conv: 3x3 instead of 7x7 for Tiny ImageNet resolution
    x = layers.Conv2D(64, (3, 3), strides=1, padding="same",
                      use_bias=False, name="conv1")(inputs)
    x = layers.BatchNormalization(axis=-1, name="bn1")(x)
    x = layers.ReLU(name="relu1")(x)

    # Stage 1: 2 blocks, 64 filters
    x = residual_block(x, 64, stride=1, use_projection=False, name="stage1_block1")
    x = residual_block(x, 64, stride=1, use_projection=False, name="stage1_block2")

    # Stage 2: 2 blocks, 128 filters, first block downsamples
    x = residual_block(x, 128, stride=2, use_projection=True, name="stage2_block1")
    x = residual_block(x, 128, stride=1, use_projection=False, name="stage2_block2")

    # Stage 3: 2 blocks, 256 filters, first block downsamples
    x = residual_block(x, 256, stride=2, use_projection=True, name="stage3_block1")
    x = residual_block(x, 256, stride=1, use_projection=False, name="stage3_block2")

    # Stage 4: 2 blocks, 512 filters, first block downsamples
    x = residual_block(x, 512, stride=2, use_projection=True, name="stage4_block1")
    x = residual_block(x, 512, stride=1, use_projection=False, name="stage4_block2")

    # Global average pooling and classifier
    x = layers.GlobalAveragePooling2D(name="avg_pool")(x)
    outputs = layers.Dense(num_classes, activation="softmax", name="fc")(x)

    model = models.Model(inputs=inputs, outputs=outputs, name="ResNet18_TinyImageNet")
    return model


# Build and compile the model
resnet_model = build_resnet18(input_shape=(64, 64, 3), num_classes=200)

resnet_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

resnet_model.summary()




## Model Compilation and Training

This cell compiles and trains the CNN model.

### Compilation
The Adam optimizer is used because it adapts the learning rate during training and performs well across a wide range of deep learning tasks.  
The loss function is categorical crossentropy, which is standard for multi class classification problems.  
Accuracy is used as the evaluation metric.

### Training Strategy
The model is trained for a fixed number of epochs using the training generator.  
The validation generator is used to measure generalization performance across epochs.  
This allows us to observe potential overfitting and adjust the model or regularization if needed.

The training history is stored so that accuracy and loss curves can be plotted in the next cell.


In [None]:
# Recreate generators to make sure they are in sync with the current graph
IMAGE_SIZE = (64, 64)
BATCH_SIZE = 64

train_generator = train_datagen.flow_from_directory(
    os.path.join(DATA_ROOT, "train"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_directory(
    os.path.join(DATA_ROOT, "val"),
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

# Compile with a solid baseline learning rate
resnet_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

EPOCHS = 15  # you can lower this to 3 just to verify training
history = resnet_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator
)


## Training Curves

This cell plots the training and validation accuracy and loss across epochs.  
These plots help evaluate whether the model is learning effectively and whether overfitting is occurring.  
A healthy model typically shows increasing accuracy and decreasing loss on both training and validation sets.


In [None]:
plt.figure(figsize=(12, 5))

# Accuracy
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Accuracy Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Loss Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()


## Classification Report (Precision, Recall, and F1 Score)

Accuracy alone does not fully describe model performance for a 200 class dataset.  
Precision, recall, and F1 score provide a more detailed view of how the model behaves on each class.

Macro averaged precision treats each class equally and measures how often predictions for that class are correct.  
Macro averaged recall measures how often the model correctly identifies images from each class.  
Macro averaged F1 combines precision and recall into a single balanced score.

This report is generated using scikit learn.


In [None]:
# Get predictions for the validation set
val_generator.reset()
pred_probs = model.predict(val_generator)
pred_classes = np.argmax(pred_probs, axis=1)
true_classes = val_generator.classes

# Produce classification report
from sklearn.metrics import classification_report

class_labels = list(val_generator.class_indices.keys())
report = classification_report(true_classes, pred_classes, target_names=class_labels, zero_division=0)

print(report)


## Confusion Matrix

The confusion matrix shows how the model performs on each class.  
Diagonal entries correspond to correct predictions.  
Off diagonal entries show misclassifications.

Visualizing the confusion matrix helps reveal which classes are frequently confused.  
For a 200 class dataset, the full matrix is large, so the heatmap focuses on general patterns rather than individual labels.


In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(true_classes, pred_classes)

plt.figure(figsize=(12, 10))
sns.heatmap(cm, cmap="Blues", cbar=True)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()


## Top 5 Accuracy

Top 5 accuracy measures whether the correct class appears among the five most probable predictions.  
This metric is widely used for large multiclass datasets such as ImageNet and Tiny ImageNet because a single mistake among 200 classes does not always indicate meaningful failure.


In [None]:
# Compute top 5 accuracy manually
top5_correct = 0
top5_total = len(true_classes)

top5_preds = np.argsort(pred_probs, axis=1)[:, -5:]  # last 5 entries are the highest

for i in range(top5_total):
    if true_classes[i] in top5_preds[i]:
        top5_correct += 1

top5_accuracy = top5_correct / top5_total
print("Top 5 Accuracy:", top5_accuracy)


## Evaluation Summary

This notebook reports several metrics to evaluate model performance on Tiny ImageNet.

1. Accuracy  
Measures overall correctness of the predictions.

2. Loss  
Measures prediction confidence and fit to the data.

3. Precision, Recall, and F1 Score  
Provide per class evaluation and reveal strengths and weaknesses beyond accuracy.

4. Confusion Matrix  
Shows patterns of misclassification between classes.

5. Top 5 Accuracy  
Indicates whether the true class appears among the five most likely predictions and is a standard metric for large multiclass datasets.

These metrics together provide a complete picture of model behavior and generalization ability.
