In [38]:
import pandas as pd
import torch
import torch.nn as nn
from torchvision.models import ResNet50_Weights
import import_ipynb
from preprocessing import AlbumDataset, train_data, validation_data, test_data, transform
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch.optim as optim

df = pd.read_csv("album_covers_dataset.csv")

In [32]:
train_dataset = AlbumDataset(train_data, transform=transform)
validation_dataset = AlbumDataset(validation_data, transform=transform)
test_dataset = AlbumDataset(test_data, transform=transform)

In [36]:
# define the number of genres (has to be 7, but I left the copmutation in case the df changes)
num_classes = len(df['genre'].unique())

# load the pretrained ResNet model
model = models.resnet50(weights=ResNet50_Weights.DEFAULT)

# freeze all layers to retain their pretrained weights
for param in model.parameters():
    param.requires_grad = False

# replace the final fully connected layer
model.fc = nn.Sequential(
    nn.Linear(model.fc.in_features, 512),  # first fully connected layer
    nn.ReLU(),
    nn.Dropout(0.3),                       # dropout for regularization
    nn.Linear(512, num_classes)            # output layer (num_classes genres)
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [34]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False)

In [39]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

# Training loop
num_epochs = 10
train_loss_history = []
val_loss_history = []

for epoch in range(num_epochs):
    # Training phase
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for images, labels in tqdm(train_loader):
        images, labels = images.to(device), labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_loss_history.append(running_loss / len(train_loader))
    print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {train_loss_history[-1]:.4f}")

    # Validation phase
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # Accuracy calculation
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    val_loss_history.append(val_loss / len(val_loader))
    val_accuracy = 100 * correct / total
    print(f"Validation Loss: {val_loss_history[-1]:.4f}, Accuracy: {val_accuracy:.2f}%")

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [03:30<00:00,  5.13s/it]


Epoch 1/10, Training Loss: 1.3973
Validation Loss: 1.3291, Accuracy: 46.58%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [03:59<00:00,  5.84s/it]


Epoch 2/10, Training Loss: 1.1001
Validation Loss: 1.2537, Accuracy: 49.69%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [04:15<00:00,  6.24s/it]


Epoch 3/10, Training Loss: 0.8908
Validation Loss: 1.3021, Accuracy: 48.45%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [04:43<00:00,  6.92s/it]


Epoch 4/10, Training Loss: 0.7373
Validation Loss: 1.3575, Accuracy: 46.58%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [04:52<00:00,  7.12s/it]


Epoch 5/10, Training Loss: 0.6111
Validation Loss: 1.4248, Accuracy: 47.20%


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [04:44<00:00,  6.93s/it]


Epoch 6/10, Training Loss: 0.4871
Validation Loss: 1.5209, Accuracy: 46.58%


 51%|███████████████████████████████████████████████████████████████████████████████▍                                                                           | 21/41 [02:45<02:37,  7.89s/it]


KeyboardInterrupt: 