# training.ipynb

Training implementation.

Author: Connacher Murphy

In [1]:
# Libraries
import pest_classification as pest

import numpy as np
import os
import random
from sklearn.model_selection import StratifiedKFold
import timm
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from types import SimpleNamespace

In [2]:
def set_seed(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    
    # PyTorch seed
    torch.manual_seed(seed)
    
    # CUDA 1 GPU seed
    torch.cuda.manual_seed(seed)
    
    # CUDA multi-GPU seed
    torch.cuda.manual_seed_all(seed)
    
    # Force deterministic operations in cudnn
    torch.backends.cudnn.deterministic = True 
    
    # Disable cudnn auto-tuner
    torch.backends.cudnn.benchmark = False

In [3]:
# Set random seed
set_seed(456)

In [4]:
# Grab training observations from images df
# CM: sampling to speed up execution
df_all = pest.df
df = df_all[df_all["set"] == "train_set"].sample(2048)
df = df.reset_index(drop=True)

In [5]:
# Dataset and dataloader
config = SimpleNamespace(**{})

config.batch_size = 64

config.image_dir = os.path.expanduser("~/data/ccmt/CCMT Dataset-Augmented")
config.image_size = 256

In [6]:
# Specify architecture parameters
config.num_classes = len(pest.crop_descriptions["Maize"])
config.backbone = "resnet18"

# Specify optimizer parameters
config.lr = 1e-4
config.num_epochs = 3

In [7]:
# Select GPU if available
print(torch.cuda.is_available())
config.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

False


In [8]:
# Training function
def train(train_dataloader, model, optimizer, config):
    for epoch in range(config.num_epochs):
        print(f"Epoch {epoch + 1}")
        
        model, train_loss, train_accuracy = pest.train_epoch(
            train_dataloader, model, optimizer, config
        )
        
        print(f"Training: loss = {train_loss}, accuracy = {train_accuracy}")
    
    return model

In [9]:
# Create df and datasets
train_df = df.reset_index(drop=True)
train_dataset = pest.AugmentedCCMT(config, train_df)

# Dataloaders
train_dataloader = DataLoader(
    train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=0
)

# Initialize (pre-trained) model
model = timm.create_model(
    config.backbone, pretrained=True, num_classes=config.num_classes
)
model.to(config.device)

# Specify loss function (CM: move this to outer loop?)
config.criterion = nn.CrossEntropyLoss()

# Initialize optimizer
optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=0.0)

# Call training function
model = train(train_dataloader, model, optimizer, config)

print("\n")

Epoch 1


100%|██████████| 32/32 [03:19<00:00,  6.22s/it]


Training: loss = 1.8295834138989449, accuracy = 0.31298828125
Epoch 2


100%|██████████| 32/32 [03:14<00:00,  6.08s/it]


Training: loss = 1.6284552477300167, accuracy = 0.48583984375
Epoch 3


100%|██████████| 32/32 [03:18<00:00,  6.19s/it]

Training: loss = 1.3925710543990135, accuracy = 0.5947265625







In [10]:
# Save model
torch.save(model.state_dict(), "model.pth")