### Environment and Device Check  
Before training the model, it is important to verify that TensorFlow is installed correctly and to identify which devices are available for computation.  
This cell prints the TensorFlow version, lists all logical devices detected by TensorFlow, and reports the number of GPUs available.  
This determines whether training will run on CPU or GPU.


In [None]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Available devices:")
for d in tf.config.list_logical_devices():
    print(" ", d)

print("Num GPUs:", len(tf.config.list_physical_devices('GPU')))


### Install kagglehub  
This project uses Tiny-ImageNet downloaded through **kagglehub**, so we need to install the package before accessing the dataset.  
This cell installs kagglehub inside the current environment.


In [None]:
!pip install kagglehub


### Download Tiny-ImageNet  
We use `kagglehub` to download the Tiny-ImageNet dataset.  
This block retrieves the dataset, prints the local path, and sets the project root directory so later cells can load images correctly.


In [None]:
import kagglehub
import shutil
import os

# Download kaggle dataset
src = kagglehub.dataset_download("akash2sharma/tiny-imagenet")

# Copy only once into /content
root = "/content/tiny-imagenet-200"

if not os.path.exists(root):
    shutil.copytree(os.path.join(src, "tiny-imagenet-200"), root)

print("Dataset ready at:", root)
print("Contents:", os.listdir(root))



### Reorganize Validation Set  
Load val annotations without moving images

In [None]:
import pandas as pd
import os

val_annotations = os.path.join(root, "val", "val_annotations.txt")

df_val = pd.read_csv(val_annotations, sep="\t", header=None)
df_val.columns = ["filename", "class", "x1", "y1", "x2", "y2"]

# Add full path to images
df_val["filepath"] = df_val["filename"].apply(
    lambda f: os.path.join(root, "val", "images", f)
)

print(df_val.head())
print("Validation samples:", len(df_val))



### Load training images normally

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = 64
BATCH_SIZE = 64

train_gen = ImageDataGenerator(rescale=1.0/255)

train_generator = train_gen.flow_from_directory(
    os.path.join(root, "train"),
    target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=True
)

num_classes = train_generator.num_classes
print("Classes:", num_classes)



### Validation Set Generator

Building a custom generator that loads images directly from file paths

In [None]:
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np

# Map class names to indices (same as training)
class_to_idx = train_generator.class_indices

# Keep only rows where class is valid
df_val = df_val[df_val["class"].isin(class_to_idx.keys())].reset_index(drop=True)
print("Validation filtered:", len(df_val))


class ValSequence(Sequence):
    def __init__(self, df, batch_size, img_size):
        self.df = df
        self.batch_size = batch_size
        self.img_size = img_size
        self.class_to_idx = class_to_idx

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def __getitem__(self, idx):
        batch_df = self.df.iloc[idx*self.batch_size:(idx+1)*self.batch_size]

        images = []
        labels = []

        for _, row in batch_df.iterrows():
            img = load_img(row["filepath"], target_size=(self.img_size, self.img_size))
            img = img_to_array(img) / 255.0
            images.append(img)

            label_vec = np.zeros(len(self.class_to_idx))
            label_vec[self.class_to_idx[row["class"]]] = 1
            labels.append(label_vec)

        return np.array(images), np.array(labels)


val_generator = ValSequence(df_val, batch_size=BATCH_SIZE, img_size=IMG_SIZE)

print("Validation batches:", len(val_generator))



Sanity Check

In [None]:
# Check training batch
x_train, y_train = next(train_generator)
print("Train X:", x_train.shape)
print("Train Y:", y_train.shape)

# Check validation batch
x_val, y_val = val_generator[0]
print("Val X:", x_val.shape)
print("Val Y:", y_val.shape)

print("Train num classes:", y_train.shape[1])
print("Val num classes:", y_val.shape[1])

### Define the Custom ResNet-18 Model  
This cell implements a lightweight ResNet-18 suitable for Tiny-ImageNet.  
It includes:  
- A residual block with optional projection  
- Downsampling at each stage  
- Global average pooling  
- Final softmax classification head  

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

def residual_block(x, filters, stride=1, use_projection=False, name=None):
    shortcut = x

    x = layers.Conv2D(filters, 3, stride, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters, 3, padding="same", use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    if use_projection:
        shortcut = layers.Conv2D(filters, 1, stride, padding="same", use_bias=False)(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])
    x = layers.ReLU()(x)
    return x

def build_resnet18(input_shape=(64, 64, 3), num_classes=200):
    inputs = layers.Input(shape=input_shape)

    x = layers.Conv2D(64, 3, padding="same", use_bias=False)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = residual_block(x, 64, stride=1, use_projection=False)
    x = residual_block(x, 64, stride=1, use_projection=False)

    x = residual_block(x, 128, stride=2, use_projection=True)
    x = residual_block(x, 128, stride=1, use_projection=False)

    x = residual_block(x, 256, stride=2, use_projection=True)
    x = residual_block(x, 256, stride=1, use_projection=False)

    x = residual_block(x, 512, stride=2, use_projection=True)
    x = residual_block(x, 512, stride=1, use_projection=False)

    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)

    return models.Model(inputs, outputs)


### Compile the ResNet-18 Model  
We compile using Adam with a stable learning rate.  
Loss is categorical cross entropy because Tiny-ImageNet labels are one hot.  
Accuracy is tracked along with planned later metrics (Top 5 accuracy).


In [None]:
model = build_resnet18()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="categorical_crossentropy",
    metrics=[
        "accuracy",
        tf.keras.metrics.TopKCategoricalAccuracy(k=5, name="top5_accuracy")
    ]
)

model.summary()


### Add Training Callbacks  
We include two callbacks to stabilize training:

1. **EarlyStopping**  
   Stops training when validation accuracy stops improving.  
   This prevents overfitting and wasted compute.

2. **ModelCheckpoint**  
   Saves the best performing model during training.  
   We will reload this model later for final evaluation.


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

checkpoint_path = "best_resnet18.h5"

callbacks = [
    EarlyStopping(
        monitor="val_accuracy",
        patience=3,
        restore_best_weights=True
    ),
    ModelCheckpoint(
        filepath=checkpoint_path,
        monitor="val_accuracy",
        save_best_only=True,
        save_weights_only=False
    )
]

### Train the ResNet-18 Model  
We now begin full training using the GPU.  
The model will run for up to 20 epochs, but **EarlyStopping** may stop it earlier if validation accuracy plateaus.

Training includes:
- Feedforward and backprop on the training set  
- Validation tracking every epoch  
- Checkpoint saving for the best model  
- Early stopping to avoid overfitting


In [None]:
EPOCHS = 20  # GPU-friendly

history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=EPOCHS,
    callbacks=callbacks
)


### Plot Training Curves  
To understand learning behavior, we plot both accuracy and loss for training and validation.  
This helps visualize:
- Whether the model is overfitting  
- Whether accuracy is improving across epochs  
- If loss is decreasing or plateauing  


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(history.history["accuracy"], label="train")
plt.plot(history.history["val_accuracy"], label="val")
plt.title("Accuracy"); plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history["loss"], label="train")
plt.plot(history.history["val_loss"], label="val")
plt.title("Loss"); plt.legend()

plt.show()


### Collect Model Predictions for Confusion Matrix  
We gather predictions for the entire validation set by iterating over the validation generator.  
Steps:  
1. Reverse the `class_indices` mapping to convert integer predictions back to class names.  
2. Loop through all validation batches.  
3. Collect ground truth labels (`y_true`) and predicted labels (`y_pred`).  
4. Convert them to numpy arrays for confusion matrix computation.  


In [None]:
import numpy as np
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Get class order
class_indices = train_generator.class_indices
idx_to_class = {v: k for k, v in class_indices.items()}

# Predict all validation samples
y_true = []
y_pred = []

val_generator.reset()

for _ in range(len(val_generator)):
    batch_x, batch_y = next(val_generator)
    preds = model.predict(batch_x, verbose=0)
    y_true.extend(np.argmax(batch_y, axis=1))
    y_pred.extend(np.argmax(preds, axis=1))

y_true = np.array(y_true)
y_pred = np.array(y_pred)

print("Prediction collection complete.")


### Confusion Matrix Visualization and Export  
To evaluate how well the model distinguishes between different categories, we compute the confusion matrix across all 200 classes.  
Since plotting all 200 categories at once becomes unreadable, we visualize only the first 20 classes as a heatmap.  

Steps:  
1. Compute the full confusion matrix using all validation predictions.  
2. Extract a smaller 20x20 subset for readable visualization.  
3. Plot a heatmap with seaborn.  
4. Save the full confusion matrix as a CSV file for further analysis or reporting.  


In [None]:
import pandas as pd

cm = confusion_matrix(y_true, y_pred)
print("Confusion matrix shape:", cm.shape)

subset_classes = list(range(20))

cm_subset = cm[np.ix_(subset_classes, subset_classes)]

plt.figure(figsize=(12,10))
sns.heatmap(cm_subset, annot=False, cmap="Blues")
plt.title("Confusion Matrix (first 20 classes)")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()



df_cm = pd.DataFrame(cm)
df_cm.to_csv("tiny_imagenet_confusion_matrix.csv", index=False)
print("Confusion matrix saved to tiny_imagenet_confusion_matrix.csv")


### Evaluating the Best Saved Model  
After training with early stopping and checkpointing, the model with the highest validation accuracy is loaded from disk.  
To measure performance, we compute:  

- Final loss on the validation set  
- Top 1 accuracy  
- Top 5 accuracy  

Top 1 accuracy indicates whether the highest softmax probability matches the true class.  
Top 5 accuracy checks whether the correct label appears within the five most likely predictions.  


In [None]:
best_model = tf.keras.models.load_model("best_resnet18.h5")

results = best_model.evaluate(val_generator)
print("Best model results:", results)
print("Top 1 accuracy:", results[1])
print("Top 5 accuracy:", results[2])
