# Pseudo Labeling

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import sys
sys.path.insert(0, "../src")

In [25]:
from pathlib import Path

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn import model_selection
from scipy.special import softmax

import torch

import config
import dataset
import engine
import models
import utils

In [4]:
device = torch.device("cuda")
SEED = 42
utils.seed_everything(SEED)

In [5]:
df = pd.read_csv("../input/train.csv")

In [6]:
df = df.sample(frac=1.0).reset_index()
train_indices, valid_indices = model_selection.train_test_split(
    np.arange(len(df)), test_size=0.1, stratify=df.digit
)
train_dataset = dataset.EMNISTDataset(df, train_indices)
valid_dataset = dataset.EMNISTDataset(df, valid_indices)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=256)

## Baseline

In [8]:
EPOCHS = 200

model = models.Model().to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", verbose=True, patience=15, factor=0.75
)

scaler = torch.cuda.amp.GradScaler()

for epoch in range(EPOCHS):
    engine.train(train_loader, model, optimizer, device, scaler)
    preds, targs = engine.evaluate(valid_loader, model, device)
    preds = np.argmax(preds, axis=1)
    accuracy = metrics.accuracy_score(targs, preds)
    scheduler.step(accuracy)
    
    if epoch % 10 == 0:
        print(f"Epoch={epoch}, Accuracy={accuracy}")
        
preds, targs = engine.evaluate(valid_loader, model, device)
preds = np.argmax(preds, axis=1)
accuracy = metrics.accuracy_score(targs, preds)
print(f"Validation Accuracy={accuracy}")

Epoch=0, Accuracy=0.3024390243902439
Epoch=10, Accuracy=0.7121951219512195
Epoch=20, Accuracy=0.7804878048780488
Epoch=30, Accuracy=0.7707317073170732
Epoch=40, Accuracy=0.7951219512195122
Epoch    44: reducing learning rate of group 0 to 7.5000e-04.
Epoch=50, Accuracy=0.7707317073170732
Epoch    60: reducing learning rate of group 0 to 5.6250e-04.
Epoch=60, Accuracy=0.8146341463414634
Epoch=70, Accuracy=0.8195121951219512
Epoch    76: reducing learning rate of group 0 to 4.2188e-04.
Epoch=80, Accuracy=0.8341463414634146
Epoch=90, Accuracy=0.8439024390243902
Epoch=100, Accuracy=0.848780487804878
Epoch=110, Accuracy=0.8390243902439024
Epoch   114: reducing learning rate of group 0 to 3.1641e-04.
Epoch=120, Accuracy=0.8146341463414634
Epoch   130: reducing learning rate of group 0 to 2.3730e-04.
Epoch=130, Accuracy=0.8585365853658536
Epoch=140, Accuracy=0.848780487804878
Epoch   146: reducing learning rate of group 0 to 1.7798e-04.
Epoch=150, Accuracy=0.848780487804878
Epoch=160, Accurac

In [9]:
df_test = pd.read_csv("../input/test.csv")
test_dataset = dataset.EMNISTDataset(df_test, np.arange(len(df_test)), label=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=256)

In [10]:
preds = engine.evaluate(test_loader, model, device, target=False)
probs = softmax(preds, axis=1)

In [20]:
pl = df_test.copy()
pl["digit"] = np.argmax(probs, axis=1)
pl["prob"] = np.max(probs, axis=1)

In [21]:
threshold = 0.995
pl = pl[pl.prob > threshold]
pl.shape

(17918, 788)

In [12]:
pl.to_csv("../input/pl/train_pl.csv", index=False)
pl.head()

Unnamed: 0,id,letter,0,1,2,3,4,5,6,7,...,776,777,778,779,780,781,782,783,digit,prob
0,2049,L,0,4,0,2,4,2,3,1,...,4,2,2,4,3,4,1,4,6,1.0
1,2050,C,4,1,4,0,1,1,0,2,...,2,4,2,4,2,2,1,2,9,0.999917
2,2051,S,0,4,0,1,3,2,3,0,...,2,0,3,2,3,0,1,4,8,0.999662
3,2052,K,2,1,3,3,3,4,3,0,...,3,2,4,1,0,4,4,4,0,1.0
4,2053,W,1,0,1,1,2,2,1,4,...,1,4,0,2,1,2,3,4,3,1.0


## SpinalVGG

In [13]:
EPOCHS = 200

model2 = models.SpinalVGG().to(device)

optimizer = torch.optim.Adam(model2.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="max", verbose=True, patience=15, factor=0.75
)

scaler = torch.cuda.amp.GradScaler()

for epoch in range(EPOCHS):
    engine.train(train_loader, model2, optimizer, device, scaler)
    preds, targs = engine.evaluate(valid_loader, model2, device)
    preds = np.argmax(preds, axis=1)
    accuracy = metrics.accuracy_score(targs, preds)
    scheduler.step(accuracy)
    
    if epoch % 10 == 0:
        print(f"Epoch={epoch}, Accuracy={accuracy}")
        
preds, targs = engine.evaluate(valid_loader, model2, device)
preds = np.argmax(preds, axis=1)
accuracy = metrics.accuracy_score(targs, preds)
print(f"Validation Accuracy={accuracy}")

Epoch=0, Accuracy=0.2780487804878049
Epoch=10, Accuracy=0.8048780487804879
Epoch=20, Accuracy=0.8585365853658536
Epoch=30, Accuracy=0.848780487804878
Epoch=40, Accuracy=0.848780487804878
Epoch    48: reducing learning rate of group 0 to 7.5000e-04.
Epoch=50, Accuracy=0.848780487804878
Epoch=60, Accuracy=0.8682926829268293
Epoch    69: reducing learning rate of group 0 to 5.6250e-04.
Epoch=70, Accuracy=0.848780487804878
Epoch=80, Accuracy=0.8780487804878049
Epoch    85: reducing learning rate of group 0 to 4.2188e-04.
Epoch=90, Accuracy=0.8878048780487805
Epoch   101: reducing learning rate of group 0 to 3.1641e-04.
Epoch=100, Accuracy=0.8829268292682927
Epoch=110, Accuracy=0.8780487804878049
Epoch   117: reducing learning rate of group 0 to 2.3730e-04.
Epoch=120, Accuracy=0.8731707317073171
Epoch=130, Accuracy=0.8829268292682927
Epoch   133: reducing learning rate of group 0 to 1.7798e-04.
Epoch=140, Accuracy=0.8780487804878049
Epoch   149: reducing learning rate of group 0 to 1.3348e-

In [14]:
preds2 = engine.evaluate(test_loader, model2, device, target=False)
probs2 = softmax(preds2, axis=1)

In [15]:
pl2 = df_test.copy()
pl2["digit"] = np.argmax(probs2, axis=1)
pl2["prob"] = np.max(probs2, axis=1)

In [16]:
threshold = 0.995
pl2 = pl2[pl2.prob > threshold]
pl2.shape

(17748, 788)

In [17]:
pl2.to_csv("../input/pl2/train_pl2.csv", index=False)
pl2.head()

Unnamed: 0,id,letter,0,1,2,3,4,5,6,7,...,776,777,778,779,780,781,782,783,digit,prob
0,2049,L,0,4,0,2,4,2,3,1,...,4,2,2,4,3,4,1,4,6,1.0
1,2050,C,4,1,4,0,1,1,0,2,...,2,4,2,4,2,2,1,2,9,0.999999
2,2051,S,0,4,0,1,3,2,3,0,...,2,0,3,2,3,0,1,4,8,0.99904
3,2052,K,2,1,3,3,3,4,3,0,...,3,2,4,1,0,4,4,4,0,1.0
4,2053,W,1,0,1,1,2,2,1,4,...,1,4,0,2,1,2,3,4,3,1.0


## Blend

In [18]:
probs3 = (probs + probs2) / 2
pl3 = df_test.copy()
pl3["digit"] = np.argmax(probs3, axis=1)
pl3["prob"] = np.max(probs3, axis=1)

In [19]:
threshold = 0.995
pl3 = pl3[pl3.prob > threshold]
pl3.shape

(15884, 788)

In [24]:
pl3.to_csv("../input/pl3/train_pl3.csv", index=False)
pl3.head()

Unnamed: 0,id,letter,0,1,2,3,4,5,6,7,...,776,777,778,779,780,781,782,783,digit,prob
0,2049,L,0,4,0,2,4,2,3,1,...,4,2,2,4,3,4,1,4,6,1.0
1,2050,C,4,1,4,0,1,1,0,2,...,2,4,2,4,2,2,1,2,9,0.999958
2,2051,S,0,4,0,1,3,2,3,0,...,2,0,3,2,3,0,1,4,8,0.999351
3,2052,K,2,1,3,3,3,4,3,0,...,3,2,4,1,0,4,4,4,0,1.0
4,2053,W,1,0,1,1,2,2,1,4,...,1,4,0,2,1,2,3,4,3,1.0


## Make Folds

In [28]:
def make_folds(df, path):
    path = Path(path)
    df = df.sample(frac=1).reset_index(drop=False)
    kf = model_selection.StratifiedKFold(n_splits=5)
    for fold, (train_indices, valid_indices) in enumerate(kf.split(df, y=df.digit)):
        np.save(path / f"train_idx-fold{fold}", train_indices)
        np.save(path / f"valid_idx-fold{fold}", valid_indices)

In [29]:
df = pd.read_csv("../input/pl/train_pl.csv")
make_folds(df, "../input/pl")

In [30]:
df = pd.read_csv("../input/pl2/train_pl2.csv")
make_folds(df, "../input/pl2")

In [31]:
df = pd.read_csv("../input/pl3/train_pl3.csv")
make_folds(df, "../input/pl3")