# validation.ipynb

Validation implementation.

Author: Connacher Murphy

In [1]:
# Libraries
import pest_classification as pest

import numpy as np
import os
import random
from sklearn.model_selection import StratifiedKFold
import timm
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader
from types import SimpleNamespace

In [2]:
def set_seed(seed=1234):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    
    # PyTorch seed
    torch.manual_seed(seed)
    
    # CUDA 1 GPU seed
    torch.cuda.manual_seed(seed)
    
    # CUDA multi-GPU seed
    torch.cuda.manual_seed_all(seed)
    
    # Force deterministic operations in cudnn
    torch.backends.cudnn.deterministic = True 
    
    # Disable cudnn auto-tuner
    torch.backends.cudnn.benchmark = False

In [3]:
# Set random seed
set_seed(123)

In [4]:
# Grab training observations from images df
# CM: sampling to speed up execution
df_all = pest.df
df = df_all[df_all["set"] == "train_set"].sample(2048)
df = df.reset_index(drop=True)

In [5]:
# Dataset and dataloader
config = SimpleNamespace(**{})

config.batch_size = 64

config.image_dir = os.path.expanduser("~/data/ccmt/CCMT Dataset-Augmented")
config.image_size = 256

In [6]:
# Add folds to the dataframe
config.n_folds = 4

skf = StratifiedKFold(n_splits=config.n_folds)
# CM: should I add a shuffle here?
# skf = StratifiedKFold(n_splits=config.n_folds, shuffle=True)

In [7]:
# Partition into folds
for fold, (train_index, val_index) in enumerate(skf.split(df, df.label)):
    df.loc[val_index, "fold"] = fold

In [8]:
# Specify architecture parameters
config.num_classes = len(pest.crop_descriptions["Maize"])
# config.num_classes = 2
config.backbone = "resnet18"

# Specify optimizer parameters
config.lr = 1e-4
config.num_epochs = 3

In [9]:
# Select GPU if available
print(torch.cuda.is_available())
config.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

False


In [10]:
# Training function
def train(train_dataloader, valid_dataloader, model, optimizer, config):
    for epoch in range(config.num_epochs):
        print(f"Epoch {epoch + 1}")
        print("Training...")
        train_loss, train_accuracy = pest.train_epoch(
            train_dataloader, model, optimizer, config
        )
        print(f"Training: loss = {train_loss}, accuracy = {train_accuracy}")
        print("Validating...")
        valid_loss, valid_accuracy = pest.validate_epoch(
            valid_dataloader, model, config
        )
        print(f"Validation: loss = {valid_loss}, accuracy = {valid_accuracy}")

In [11]:
for fold in range(config.n_folds):
    print(f"Fold {fold}")

    # Split into training and validation sets
    train_df = df[df["fold"] != fold].reset_index(drop=True)
    valid_df = df[df["fold"] == fold].reset_index(drop=True)

    train_dataset = pest.AugmentedCCMT(config, train_df)
    valid_dataset = pest.AugmentedCCMT(config, valid_df)

    # Dataloaders
    train_dataloader = DataLoader(
        train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=0
    )
    valid_dataloader = DataLoader(
        valid_dataset, batch_size=config.batch_size, shuffle=False, num_workers=0
    )

    # Initialize (pre-trained) model
    model = timm.create_model(
        config.backbone, pretrained=True, num_classes=config.num_classes
    )
    model.to(config.device)

    # Specify loss function (CM: move this to outer loop?)
    config.criterion = nn.CrossEntropyLoss()

    # Initialize optimizer
    optimizer = optim.Adam(model.parameters(), lr=config.lr, weight_decay=0.0)

    # Call training function
    train(train_dataloader, valid_dataloader, model, optimizer, config)

    print("\n")
    

Fold 0
Epoch 1
Training...


100%|██████████| 24/24 [02:17<00:00,  5.73s/it]


Training: loss = 1.8868432343006134, accuracy = 0.2623697916666667
Validating...


100%|██████████| 8/8 [00:20<00:00,  2.57s/it]


Validation: loss = 1.8210224360227585, accuracy = 0.369140625
Epoch 2
Training...


100%|██████████| 24/24 [02:24<00:00,  6.01s/it]


Training: loss = 1.7134468307097752, accuracy = 0.4733072916666667
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.70s/it]


Validation: loss = 1.673723429441452, accuracy = 0.490234375
Epoch 3
Training...


100%|██████████| 24/24 [02:22<00:00,  5.92s/it]


Training: loss = 1.5364590187867482, accuracy = 0.6145833333333334
Validating...


100%|██████████| 8/8 [00:20<00:00,  2.51s/it]


Validation: loss = 1.5050919651985168, accuracy = 0.59375


Fold 1
Epoch 1
Training...


100%|██████████| 24/24 [02:20<00:00,  5.87s/it]


Training: loss = 1.8774902919928234, accuracy = 0.21744791666666666
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.67s/it]


Validation: loss = 1.8276771903038025, accuracy = 0.318359375
Epoch 2
Training...


100%|██████████| 24/24 [02:25<00:00,  6.05s/it]


Training: loss = 1.7094217538833618, accuracy = 0.5182291666666666
Validating...


100%|██████████| 8/8 [00:20<00:00,  2.56s/it]


Validation: loss = 1.666611298918724, accuracy = 0.537109375
Epoch 3
Training...


100%|██████████| 24/24 [02:21<00:00,  5.88s/it]


Training: loss = 1.5361276715993881, accuracy = 0.6178385416666666
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.69s/it]


Validation: loss = 1.4800017476081848, accuracy = 0.607421875


Fold 2
Epoch 1
Training...


100%|██████████| 24/24 [02:23<00:00,  5.98s/it]


Training: loss = 1.871143102645874, accuracy = 0.23372395833333334
Validating...


100%|██████████| 8/8 [00:22<00:00,  2.76s/it]


Validation: loss = 1.8166229575872421, accuracy = 0.296875
Epoch 2
Training...


100%|██████████| 24/24 [02:31<00:00,  6.32s/it]


Training: loss = 1.7121340582768123, accuracy = 0.4055989583333333
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.74s/it]


Validation: loss = 1.682903677225113, accuracy = 0.44140625
Epoch 3
Training...


100%|██████████| 24/24 [02:19<00:00,  5.83s/it]


Training: loss = 1.552189568678538, accuracy = 0.5078125
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.75s/it]


Validation: loss = 1.5244200676679611, accuracy = 0.521484375


Fold 3
Epoch 1
Training...


100%|██████████| 24/24 [02:36<00:00,  6.52s/it]


Training: loss = 1.8569064438343048, accuracy = 0.2923177083333333
Validating...


100%|██████████| 8/8 [00:22<00:00,  2.79s/it]


Validation: loss = 1.7934908866882324, accuracy = 0.341796875
Epoch 2
Training...


100%|██████████| 24/24 [02:31<00:00,  6.32s/it]


Training: loss = 1.6922105153401692, accuracy = 0.482421875
Validating...


100%|██████████| 8/8 [00:23<00:00,  2.91s/it]


Validation: loss = 1.6438549309968948, accuracy = 0.53125
Epoch 3
Training...


100%|██████████| 24/24 [02:30<00:00,  6.26s/it]


Training: loss = 1.5218107501665752, accuracy = 0.6061197916666666
Validating...


100%|██████████| 8/8 [00:21<00:00,  2.75s/it]

Validation: loss = 1.4486368894577026, accuracy = 0.619140625





