In [1]:
#Note: this code is written as an assignment in a Helsinki Uni course on Deep Learning and is heavily influenced by
#starter code provided by the lecturers Hande Celikkanat and Roman Yangarber

In [2]:
#Got a working training session from start to finish
#Good result but this used pre-trained model

In [3]:
import pandas as pd
import numpy as np
import time

In [4]:
#Custom functions to read in our data from internet
#Skips if data already exists

from src import data_download
data_download.fetch_data()

Folder already has folders 'annotations' and 'images'.
Assuming you already have the data and skipping fetch.


In [5]:
import torch
from torchvision import transforms, datasets
import torch.nn as nn
import torch.optim as optim

In [6]:
DATA_DIR = 'data/images'

In [7]:
#These are custom made functions to handle our data
#Maybe more documentation later
#The function used here can split our data to different sets

from src import data_handling
train, test, val = data_handling.get_target_dfs(train=0.6, test=0.2, val=0.2)

In [8]:
from src.dataset import CustomImageDataset

In [9]:
transform_mix = [transforms.ColorJitter(brightness=.5, hue=.3), transforms.RandomPerspective(distortion_scale=0.6, p=1.0),
                transforms.RandomAdjustSharpness(sharpness_factor=2)]

train_loader = torch.utils.data.DataLoader(dataset=CustomImageDataset(train, DATA_DIR, transform=transform_mix), batch_size=50, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=CustomImageDataset(test, DATA_DIR, transform=None), batch_size=50, shuffle=True)
val_loader = torch.utils.data.DataLoader(dataset=CustomImageDataset(val, DATA_DIR, transform=None), batch_size=50, shuffle=True)

In [10]:
#The rest of the code is a pretty standard simple Pytorch setup

In [11]:
if torch.cuda.is_available():
    print("Found cuda device")
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

Found cuda device


In [12]:
#Loading our models
from src.models import MultiLabelResnet, MultiLabelCNN

In [13]:
#comment out the model you don't want to use
#model = MultiLabelResnet().to(device)
model = MultiLabelCNN().to(device)

In [19]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [20]:
#This utility function is taken from Deep Learning course Programming Assignment 3
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [21]:
#Log results:
from src.log_results import Logger
logger = Logger()
logger.log("model_name", model.__class__.__name__)
logger.log("model_str", model.__str__())
logger.log("optimizer", optimizer.__str__())
logger.log("loss_function", loss_function.__str__())

In [22]:
epochs=20
early_stop_patience = 1 # How many epochs to go without improvement

best_val_accuracy = 0.0
postpone_early_stop = early_stop_patience

for epoch in range(epochs):
    ### Training
    start_time = time.time()
    model.train() #Enables dropout layer
    print(f'Starting Epoch {epoch+1}...')
    train_loss = 0
    train_accuracy = 0
    for batch_num, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        predicted_labels = (outputs > 0.5).int()
        train_accuracy += (predicted_labels == labels).float().mean().item()
    
    train_loss = train_loss / len(train_loader)
    train_accuracy = train_accuracy / len(train_loader)
    
    ### VALIDATION
    model.eval() #Disables dropout layer
    val_loss = 0
    val_accuracy = 0
    with torch.no_grad():
        for batch_num, (inputs, labels) in enumerate(val_loader):
            outputs = model(inputs)
            loss = loss_function(outputs, labels)
            val_loss += loss.item()
            predicted_labels = (outputs > 0.5).int()
            val_accuracy += (predicted_labels == labels).float().mean().item()
    
    val_loss = val_loss / len(val_loader)
    val_accuracy = val_accuracy / len(val_loader)
    
    logger.append("train_loss", train_loss)
    logger.append("train_accuracy", train_accuracy)
    logger.append("val_loss", val_loss)
    logger.append("val_accuracy", val_accuracy)
    
    ### PRINTOUT
    end_time = time.time()
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    logger.append("epoch_time", f"{epoch_mins}m {epoch_secs}s")
    
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_accuracy*100:.2f}%')
    print(f'\t Val. Loss: {val_loss:.3f} |  Val. Acc: {val_accuracy*100:.2f}%')
    
    ### EARLY STOP
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        postpone_early_stop = early_stop_patience #Reset patience
        torch.save(model.state_dict(), 'temp_best_model_state.pt') #Save best model state
        logger.log("best_epoch", epoch+1)
        continue
        
    #allowing for some epoch to have worse accuracy than the one before
    elif postpone_early_stop > 0:
        postpone_early_stop -= 1 
        print("Postponing early-stopping")
        continue
    else:
        print("Breaking loop due to early-stopping")
        model.load_state_dict(torch.load('temp_best_model_state.pt')) #Load model state from best epoch
        logger.log("early_stop", True)
        break

Starting Epoch 1...
Epoch: 01 | Epoch Time: 0m 32s
	Train Loss: 0.806 | Train Acc: 85.39%
	 Val. Loss: 0.709 |  Val. Acc: 92.83%
Starting Epoch 2...
Epoch: 02 | Epoch Time: 0m 25s
	Train Loss: 0.701 | Train Acc: 92.69%
	 Val. Loss: 0.696 |  Val. Acc: 92.84%
Starting Epoch 3...
Epoch: 03 | Epoch Time: 0m 26s
	Train Loss: 0.696 | Train Acc: 92.71%
	 Val. Loss: 0.695 |  Val. Acc: 92.85%
Starting Epoch 4...
Epoch: 04 | Epoch Time: 0m 25s
	Train Loss: 0.695 | Train Acc: 92.71%
	 Val. Loss: 0.694 |  Val. Acc: 92.85%
Starting Epoch 5...
Epoch: 05 | Epoch Time: 0m 26s
	Train Loss: 0.694 | Train Acc: 92.71%
	 Val. Loss: 0.694 |  Val. Acc: 92.85%
Starting Epoch 6...
Epoch: 06 | Epoch Time: 0m 26s
	Train Loss: 0.694 | Train Acc: 92.71%
	 Val. Loss: 0.694 |  Val. Acc: 92.86%
Starting Epoch 7...
Epoch: 07 | Epoch Time: 0m 26s
	Train Loss: 0.694 | Train Acc: 92.71%
	 Val. Loss: 0.694 |  Val. Acc: 92.85%
Postponing early-stopping
Starting Epoch 8...
Epoch: 08 | Epoch Time: 0m 26s
	Train Loss: 0.694 |

In [23]:
#The output returns a probability array for every label
#Probability is the probability of label=1 (image has the specific label)
#These are turned to actual predictions with predicted_labels = (outputs > 0.5).int()
#This means that if it's more likely than not that image has a certain label, then it gets assigned the label
#Otherwise the image will not have the label

with torch.no_grad():
    model.eval() #Disables dropout layer
    test_accuracy = 0
    test_loss = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        loss = loss_function(outputs, labels)
        test_loss += loss.item()
        predicted_labels = (outputs > 0.5).int()
        test_accuracy += (predicted_labels == labels).float().mean().item()

test_loss = test_loss / len(test_loader)
test_accuracy = test_accuracy / len(test_loader)

logger.log("test_loss", test_loss)
logger.log("test_accuracy", test_accuracy)

print(f"Test Loss: {test_loss:.3f}")
print(f"Test Accuracy: {test_accuracy*100:.2f}%")

Test Loss: 0.694
Test Accuracy: 92.92%


In [21]:
#If good results, save model state and metadata:
#Note: Overwrites results for same model
#logger.save_model(model)