In [17]:
# Horse Image Classification with PyTorch
# =======================================

# Step 1: Import Required Libraries
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Step 2: Define a Custom Dataset Class

In [18]:
class HorseDataset(Dataset):
    def __init__(self, metadata_df, transform=None):
        self.metadata_df = metadata_df
        self.transform = transform

    def __len__(self):
        return len(self.metadata_df)

    def __getitem__(self, idx):
        row = self.metadata_df.iloc[idx]
        image_path = row['image_path']
        label = row['horse_id'] - 1  # Subtract 1 to convert to 0-based index

        # Load and preprocess the image
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, label

# Step 3: Prepare the Data

In [19]:
def load_and_prepare_data(metadata_df, test_size=0.2, random_state=42):
    # Define transformations for images
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        # transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
    ])

    # Split metadata into training and testing sets
    train_df, test_df = train_test_split(metadata_df, test_size=test_size, random_state=random_state, stratify=metadata_df['horse_id'])

    # Create PyTorch datasets
    train_dataset = HorseDataset(train_df, transform=transform)
    test_dataset = HorseDataset(test_df, transform=transform)

    return train_dataset, test_dataset

# Step 5: Define the Training Loop


In [42]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10, device='cuda'):
    model = model.to(device)
    model.train()  # Ensure model is in training mode

    # Add the parameter check here
    print("Checking model parameters for requires_grad:")
    for name, param in model.named_parameters():
        print(f"Layer: {name}, requires_grad: {param.requires_grad}")

    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # Ensure requires_grad is enabled for the input
            images.requires_grad_(True)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Check for gradient flow
            assert loss.requires_grad, "Loss tensor does not require gradient."
            
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader):.4f}")

# 2. Prepare data for model training:

In [43]:

# Base path to the database
base_dir = '../data/THoDBRL2015'

metadata = []

# Consolidate images into a training directory
output_train_dir = os.path.join(base_dir, 'training_data')

os.makedirs(output_train_dir, exist_ok=True)

parts = [
    'Part1', 
    # 'Part2', 
    # 'Part3', 
    # 'Part4', 
    # 'Part5'
]

# Iterate through each Part folder
for part in parts:
    videos_dir = os.path.join(base_dir, part, 'videos')

    for horse_id_folder in os.listdir(videos_dir):
        horse_path = os.path.join(videos_dir, horse_id_folder)

        if os.path.isdir(horse_path):  # Ensure it's a directory
            # Find all stills folders (e.g., images, images1, images2, etc.)
            for stills_folder in os.listdir(horse_path):
                stills_path = os.path.join(horse_path, stills_folder)

                if os.path.isdir(stills_path) and stills_folder.startswith('images'):  # Check for folders named 'images*'
                    target_dir = os.path.join(output_train_dir, f'horse_{horse_id_folder}')

                    os.makedirs(target_dir, exist_ok=True)
                    
                    # Copy all image files from the stills folder to the target directory
                    for img_file in os.listdir(stills_path):
                        img_path = os.path.join(stills_path, img_file)

                        if img_file.endswith(('.jpg', '.jpeg', '.png')):  # Ensure it's an image file
                            metadata.append({
                                'horse_id': horse_id_folder,
                                'image_path': img_path,
                                'width': Image.open(img_path).size[0],
                                'height': Image.open(img_path).size[1],
                            })
                        
                            # shutil.copy(img_path, target_dir)

                    # print(f"Copied images from {stills_path} to {target_dir}")

# Create DataFrame with specified dtypes
metadata_df = pd.DataFrame(metadata, dtype='object').astype({
    'horse_id': 'int64',          # Assuming it's an integer
    'image_path': 'string',
    'width': 'int64',
    'height': 'int64',
})




# Step 6: Load Metadata and Prepare the Dataset

In [44]:
# Replace with your actual metadata DataFrame
# metadata_df = pd.DataFrame({
#     'image_path': ['path_to_image_1.jpg', 'path_to_image_2.jpg'],  # Replace with actual paths
#     'horse_id': [0, 1]
# })

# Prepare training and testing datasets
train_dataset, test_dataset = load_and_prepare_data(metadata_df)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Step 7: Initialize the Model, Loss Function, and Optimizer


In [None]:

from torchvision.models import resnet18

num_classes = metadata_df['horse_id'].nunique()
print(metadata_df['horse_id'].unique())
model = resnet18(pretrained=False).eval()
# model = CNNModel(num_classes)

# Use Cross-Entropy Loss for classification
criterion = nn.CrossEntropyLoss()

# Use Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 8: Train the Model


In [None]:
# Set the device to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=1, device=device)

# Stats
1 epoch takes 5 minutes on Macbook M1

# Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Generate predictions and ground truth labels (modify according to your model/data)
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for images, labels in test_loader:  # Replace 'test_loader' with your test DataLoader
        images = images.to(device)
        labels = labels.to(device)
        
        outputs = model(images)
        print(f"Outputs requires_grad: {outputs.requires_grad}")  # Debugging
        _, preds = torch.max(outputs, 1)
        
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Create confusion matrix
cm = confusion_matrix(all_labels, all_preds)
classes = range(num_classes)  # Adjust with class names if available

# Display confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=classes, yticklabels=classes)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

# GradCAM

In [None]:
# model.eval() # Already done while creating Confusion Matrix

# Set your CAM extractor
from torchcam.methods import SmoothGradCAMpp
cam_extractor = SmoothGradCAMpp(model)

WARNING:root:no value was provided for `target_layer`, thus set to 'layer4'.

In [None]:
from torchvision.io.image import read_image
from torchvision.transforms.functional import normalize, resize, to_pil_image
from torchvision.models import resnet18
from torchcam.methods import SmoothGradCAMpp

# model = resnet18(pretrained=True).eval()

# Get your input
img = read_image("../data/internet/test_horse_internet.jpg")


# Preprocess it for your chosen model
input_tensor = normalize(resize(img, (224, 224)) / 255., [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

input_tensor.requires_grad_(True)

print(input_tensor.shape)

with SmoothGradCAMpp(model) as cam_extractor:
  # Preprocess your data and feed it to the model
  out = model(input_tensor.unsqueeze(0))
  # Retrieve the CAM by passing the class index and the model output
  activation_map = cam_extractor(out.squeeze(0).argmax().item(), out)


RuntimeError: cannot register a hook on a tensor that doesn't require gradient


It seems that there are no explicit .register_hook or related hook operations in the notebook. However, the error can also arise indirectly during gradient computations or when calling .backward().

To debug further:
	1.	Ensure inputs and outputs: Confirm that tensors involved in .backward() have requires_grad=True.
	2.	Verify model parameters: All model parameters should have requires_grad=True unless explicitly frozen.