In [22]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import pytorch_lightning as pl
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split
import wandb
from pytorch_lightning.loggers import WandbLogger
from mlops_finalproject.models import model
from pytorch_lightning import Callback, Trainer
from torchvision import transforms
import pandas as pd
from PIL import Image
from torchvision import transforms, datasets
from tqdm.notebook import tqdm


In [36]:
images = torch.load("../data/processed/32/images.pt")
labels = torch.load("../data/processed/32/labels.pt")

train_dataset = TensorDataset(images, labels)  # create your datset
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False, num_workers=8)

In [42]:
# for _, l in train_loader:
#     print(l)

In [38]:
import torchvision.models as models
import torch.nn as nn
def build_model(pretrained=True, fine_tune=False, num_classes=10):
    if pretrained:
        print('[INFO]: Loading pre-trained weights')
    else:
        print('[INFO]: Not loading pre-trained weights')
    model = models.mobilenet_v3_large(pretrained=pretrained)
    if fine_tune:
        print('[INFO]: Fine-tuning all layers...')
        for params in model.parameters():
            params.requires_grad = True
    elif not fine_tune:
        print('[INFO]: Freezing hidden layers...')
        for params in model.parameters():
            params.requires_grad = False
    # Change the final classification head.
    model.classifier[3] = nn.Linear(in_features=1280, out_features=num_classes)
    return model

    

In [39]:
# Training function.
def train(
    model, trainloader, optimizer, 
    criterion, scheduler=None, epoch=None
):
    model.train()
    print('Training')
    train_running_loss = 0.0
    train_running_correct = 0
    counter = 0
    iters = len(trainloader)
    for i, data in enumerate(trainloader):
        counter += 1
        image, labels = data
        image = image.to("cpu")
        labels = labels.to("cpu")
        optimizer.zero_grad()
        # Forward pass.
        outputs = model(image)
        # Calculate the loss.
        loss = criterion(outputs, labels)
        train_running_loss += loss.item()
        # Calculate the accuracy.
        _, preds = torch.max(outputs.data, 1)
        train_running_correct += (preds == labels).sum().item()
        # Backpropagation.
        loss.backward()
        # Update the weights.
        optimizer.step()
        if scheduler is not None:
            scheduler.step(epoch + i / iters)
    
    # Loss and accuracy for the complete epoch.
    epoch_loss = train_running_loss / counter
    epoch_acc = 100. * (train_running_correct / len(trainloader.dataset))
    return epoch_loss, epoch_acc

In [40]:
# Validation function.
def validate(model, testloader, criterion, class_names):
    model.eval()
    print('Validation')
    valid_running_loss = 0.0
    valid_running_correct = 0
    counter = 0
    # We need two lists to keep track of class-wise accuracy.
    class_correct = list(0. for i in range(len(class_names)))
    class_total = list(0. for i in range(len(class_names)))
    with torch.no_grad():
        for i, data in enumerate(testloader):
            counter += 1
            
            image, labels = data
            image = image.to("cpu")
            labels = labels.to("cpu")
            # Forward pass.
            outputs = model(image)
            # Calculate the loss.
            loss = criterion(outputs, labels)
            valid_running_loss += loss.item()
            # Calculate the accuracy.
            _, preds = torch.max(outputs.data, 1)
            valid_running_correct += (preds == labels).sum().item()
            # Calculate the accuracy for each class.
            correct  = (preds == labels).squeeze()
            for i in range(len(preds)):
                label = labels[i]
                class_correct[label] += correct[i].item()
                class_total[label] += 1
        
    # Loss and accuracy for the complete epoch.
    epoch_loss = valid_running_loss / counter
    epoch_acc = 100. * (valid_running_correct / len(testloader.dataset))
    # Print the accuracy for each class after every epoch.
    print('\n')
    for i in range(len(class_names)):
        print(f"Accuracy of class {class_names[i]}: {100*class_correct[i]/class_total[i]}")
    print('\n')
    return epoch_loss, epoch_acc

In [43]:
from torch import nn, optim
import time

pretrained=True
fine_tune=True

epochs = 4

# Load the model.
model = build_model(
    pretrained=pretrained,
    fine_tune=fine_tune, 
    num_classes=43
).to('cpu')

# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")
# Optimizer.
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Loss function.
criterion = nn.CrossEntropyLoss()
scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
    optimizer, 
    T_0=10, 
    T_mult=1,
    verbose=True
)
# Lists to keep track of losses and accuracies.
train_loss, valid_loss = [], []
train_acc, valid_acc = [], []
# Start the training.
for epoch in range(epochs):
    print(f"[INFO]: Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_acc = train(
        model, train_loader, 
        optimizer, criterion,
        scheduler=scheduler, epoch=epoch
    )
    valid_epoch_loss, valid_epoch_acc = validate(model, train_loader,  
                                                criterion, [i for i in range(43)])
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)
    train_acc.append(train_epoch_acc)
    valid_acc.append(valid_epoch_acc)
    print(f"Training loss: {train_epoch_loss:.3f}, training acc: {train_epoch_acc:.3f}")
    print(f"Validation loss: {valid_epoch_loss:.3f}, validation acc: {valid_epoch_acc:.3f}")
    print('-'*50)
    time.sleep(5)
    
# # Save the trained model weights.
# save_model(epochs, model, optimizer, criterion)
# # Save the loss and accuracy plots.
# save_plots(train_acc, valid_acc, train_loss, valid_loss)
print('TRAINING COMPLETE')

[INFO]: Loading pre-trained weights
[INFO]: Fine-tuning all layers...
4,257,115 total parameters.
4,257,115 training parameters.
Epoch 00000: adjusting learning rate of group 0 to 1.0000e-03.
[INFO]: Epoch 1 of 4
Training




Epoch 0.00: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.00: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.00: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.00: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 1.0000e-03.
Epoch 0.01: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.02: adjusting learning rate of group 0 to 9.9999e-04.
Epoch 0.