# Early Stopping

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.insert(0, "../src")

In [3]:
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import model_selection
import albumentations as A

import torch

import callbacks
import config
import dataset
import models
import engine

In [4]:
import numpy as np
import torch


class EarlyStopping:
    def __init__(self, patience=7, mode="max", delta=0.0001):
        self.patience = patience
        self.mode = mode
        self.delta = delta
        
        self.best_score = None
        self.counter = 0
        self.early_stop = False
        
        if mode == "max":
            self.val_score = -np.inf
        elif mode == "min":
            self.val_score = np.inf
        
    def __call__(self, epoch_score, model, model_path):
        if self.mode == "max":
            score = np.copy(epoch_score)
        elif self.mode == "min":
            score = -1.0 * epoch_score
            
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f"EarlyStopping counter: {self.counter} out of {self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.counter = 0
            self.save_checkpoint(epoch_score, model, model_path)
    
    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in (np.inf, -np.inf, -np.nan, np.nan):
            print(f"Validation score improved ({self.val_score} --> {epoch_score}). Saving model!")
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [5]:
df = pd.read_csv(config.TRAIN_CSV)
# df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, stratify=df.digit)
train_idx, valid_idx = model_selection.train_test_split(np.arange(len(df)), test_size=0.1, stratify=df.digit)
train_dataset = dataset.EMNISTDataset(df, train_idx)
valid_dataset = dataset.EMNISTDataset(df, valid_idx)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.TEST_BATCH_SIZE)

In [6]:
EPOCHS = 200

device = torch.device(config.DEVICE)
model = models.SpinalVGG()
# model = models.Model()
model.to(device)

optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', verbose=True, patience=10, factor=0.5
)
early_stop = callbacks.EarlyStopping(patience=15, mode="max")


for epoch in range(EPOCHS):
    engine.train(train_loader, model, optimizer, device)
    predictions, targets = engine.evaluate(valid_loader, model, device)

    predictions = np.array(predictions)
    predictions = np.argmax(predictions, axis=1)
    accuracy = metrics.accuracy_score(targets, predictions)
    print(f"Epoch: {epoch}, Valid accuracy={accuracy}")
    
    model_path = "./test.pt"
    early_stop(accuracy, model, model_path)
    if early_stop.early_stop:
        print(f"Early stopping. Best score {early_stop.best_score}. Loading weights...")
        model.load_state_dict(torch.load(model_path))
        break
    
    scheduler.step(accuracy)

Epoch: 0, Valid accuracy=0.2634146341463415
Validation score improved (-inf -> 0.2634146341463415). Saving model...
Epoch: 1, Valid accuracy=0.5463414634146342
Validation score improved (0.2634146341463415 -> 0.5463414634146342). Saving model...
Epoch: 2, Valid accuracy=0.5902439024390244
Validation score improved (0.5463414634146342 -> 0.5902439024390244). Saving model...
Epoch: 3, Valid accuracy=0.7170731707317073
Validation score improved (0.5902439024390244 -> 0.7170731707317073). Saving model...
Epoch: 4, Valid accuracy=0.6926829268292682
EarlyStopping counter: 1 out of 15
Epoch: 5, Valid accuracy=0.7658536585365854
Validation score improved (0.7170731707317073 -> 0.7658536585365854). Saving model...
Epoch: 6, Valid accuracy=0.7658536585365854
EarlyStopping counter: 1 out of 15
Epoch: 7, Valid accuracy=0.7658536585365854
EarlyStopping counter: 2 out of 15
Epoch: 8, Valid accuracy=0.7024390243902439
EarlyStopping counter: 3 out of 15
Epoch: 9, Valid accuracy=0.7951219512195122
Vali

In [14]:
model.load_state_dict(torch.load("./test.pt"))

AttributeError: 'function' object has no attribute 'copy'

In [11]:
torch.save(model.state_dict(), "../models/spinalvgg.pt")

In [12]:
model = models.SpinalVGG()
model.load_state_dict(torch.load("../models/spinalvgg.pt"))

<All keys matched successfully>

In [13]:
model.to(device)
df_test = pd.read_csv(config.TEST_CSV)
test_dataset = dataset.EMNISTTestDataset(df_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.TEST_BATCH_SIZE)

In [14]:
predictions = engine.infer(test_loader, model, device)
predictions = np.array(predictions)
predictions = np.argmax(predictions, axis=1)

In [15]:
submission = pd.DataFrame({"id": df_test.id, "digit": predictions})
submission.to_csv("../output/spinalvgg.csv", index=False)
submission.head()

Unnamed: 0,id,digit
0,2049,6
1,2050,9
2,2051,8
3,2052,0
4,2053,3
