In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from efficientnet_pytorch import EfficientNet
from torch.utils.data import DataLoader
import torchvision.transforms as T
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image

import sys
import os
# Add the directory to sys.path
datasets_dir = '/dtu/blackhole/16/155094/Video_classification'
sys.path.append(datasets_dir)
from datasets import FrameImageDataset, FrameVideoDataset

In [2]:
# Define device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
transform = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(10),
    T.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

In [4]:
batch_size = 64

# Dataset directories
root_dir = '/dtu/blackhole/16/155094/ufc101'

# Initialize datasets
train_dataset = FrameImageDataset(root_dir=root_dir, split='train', transform=transform)
val_dataset = FrameImageDataset(root_dir=root_dir, split='val', transform=transform)
test_dataset = FrameImageDataset(root_dir=root_dir, split='test', transform=transform)
# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [9]:
# Hent en batch fra train_loader
for images, labels in train_loader:
    print("Image batch shape:", images.shape)
    print("Label batch shape:", labels.shape)
    break  # Stop efter den fÃ¸rste batch


Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64])


## The CNN ##

In [12]:
class Simple_CNN(nn.Module):
    def __init__(self, dropOutVal=0.2, num_classes=10):
        super(Simple_CNN, self).__init__()

        # Convolutional Feature Extraction
        self.network = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 128x128x3 -> 64x64x64

            nn.Dropout2d(dropOutVal),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 64x64x128 -> 32x32x128

            nn.Dropout2d(dropOutVal),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 32x32x128 -> 16x16x128

            nn.Dropout2d(dropOutVal),
            nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            nn.Dropout2d(dropOutVal),
            nn.Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 16x16x64 -> 8x8x64
        )
        
        # Calculate flattened size of the feature map after the last conv layer
        test_input = torch.rand(1, 3, 224, 224)  # Sample input image
        flattened_size = self.network(test_input).view(1, -1).size(1)
        # Fully Connected Layer for shared features
        self.shared_fc = nn.Linear(flattened_size , 256)
        
        # Classification Head
        self.classifier = nn.Sequential(
            nn.ReLU(),
            nn.Linear(256, num_classes)  # Output layer for classification
        )
        

    def forward(self, x):
        # Pass through the convolutional feature extractor 
        features = self.network(x).view(x.size(0), -1)  # Flatten
        shared_features = self.shared_fc(features)  # Shared FC layer
        
        # Classification and Regression outputs
        class_out = self.classifier(shared_features)
        # bbox_out = self.regressor(shared_features)
        
        return class_out

In [15]:
# Initialize the model
model = Simple_CNN(num_classes = 10,dropOutVal=0.2)
model = model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Metrics storage
train_loss_history = []
val_loss_history = []
train_acc_history = []
val_acc_history = []

# Training and Validation Loop
epochs = 10
for epoch in range(epochs):
    # Training
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        print(images)
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct / total
    train_loss_history.append(train_loss)
    train_acc_history.append(train_acc)

    # Validation
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            val_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_loss /= len(val_loader)
    val_acc = 100 * correct / total
    val_loss_history.append(val_loss)
    val_acc_history.append(val_acc)

    # Print metrics
    print(f"Epoch [{epoch+1}/{epochs}]")
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.2f}%")


tensor([[[[-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097],
          [-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097],
          [-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097],
          ...,
          [-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097],
          [-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097],
          [-1.8097, -1.8097, -1.8097,  ..., -1.8097, -1.8097, -1.8097]],

         [[-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206],
          [-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206],
          [-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206],
          ...,
          [-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206],
          [-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206],
          [-1.7206, -1.7206, -1.7206,  ..., -1.7206, -1.7206, -1.7206]],

         [[-1.4907, -1.4907, -1.4907,  ..., -1.4907, -1.4907, -1.4907],
          [-1.4907, -1.4907, -

OutOfMemoryError: CUDA out of memory. Tried to allocate 784.00 MiB. GPU 0 has a total capacity of 15.77 GiB of which 4.19 MiB is free. Process 2718799 has 308.00 MiB memory in use. Including non-PyTorch memory, this process has 13.37 GiB memory in use. Process 2827344 has 2.08 GiB memory in use. Of the allocated memory 12.52 GiB is allocated by PyTorch, and 491.26 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Plotting Loss over Epochs
epochs_range = range(1, epochs + 1)

plt.figure(figsize=(12, 5))

# Loss Plot
plt.subplot(1, 2, 1)
plt.plot(epochs_range, train_loss_history, label='Training Loss')
plt.plot(epochs_range, val_loss_history, label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# Accuracy Plot
plt.subplot(1, 2, 2)
plt.plot(epochs_range, train_acc_history, label='Training Accuracy')
plt.plot(epochs_range, val_acc_history, label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()