# Ensemble

In [15]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [16]:
import sys
sys.path.insert(0, "../src")

In [17]:
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

from sklearn import metrics
from sklearn import model_selection

import albumentations as A

import config
import dataset
import engine
import models

In [18]:
df = pd.read_csv(config.TRAIN_CSV)

In [19]:
# augs = A.Compose(
#     [
#         A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=10, p=0.9),
#         A.Normalize(config.MEAN, config.STD, max_pixel_value=255.0, always_apply=True),
#     ]
# )

In [20]:
augs = A.Compose(
    [
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=10, p=0.9),
        A.OneOf(
            [
                A.ElasticTransform(
                    alpha=120, sigma=120 * 0.05, alpha_affine=120 * 0.03
                ),
                A.GridDistortion(),
                A.OpticalDistortion(distort_limit=2, shift_limit=0.5),
            ],
            p=0.5
        ),
        A.OneOf(
            [
                A.RandomContrast(), A.RandomGamma(), A.RandomBrightness(),
            ],
            p=0.5
        ),
        A.CoarseDropout(max_holes=4, max_height=4, max_width=4, p=0.25),
        A.Normalize(config.MEAN, config.STD, max_pixel_value=255.0, always_apply=True),
    ]
)

In [None]:
nets = 15
device = torch.device(config.DEVICE)
EPOCHS = 200


cnns = [models.SpinalVGG() for _ in range(nets)]
# cnns = [models.Model() for _ in range(nets)]


for i, model in enumerate(cnns):
    df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, stratify=df.digit)
    train_dataset = dataset.EMNISTDataset(df_train, augs=augs)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True)
    valid_dataset = dataset.EMNISTDataset(df_valid)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.TEST_BATCH_SIZE)
    
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', verbose=True, patience=10, factor=0.5
    )

    model.to(device)
        
    for epoch in range(EPOCHS):
        engine.train(train_loader, model, optimizer, device)
        predictions, targets = engine.evaluate(valid_loader, model, device)

        predictions = np.array(predictions)
        predictions = np.argmax(predictions, axis=1)
        accuracy = metrics.accuracy_score(targets, predictions)
        
        scheduler.step(accuracy)
            
    print(f"CNN {i}, Validation accuracy={accuracy}")

Epoch    42: reducing learning rate of group 0 to 5.0000e-04.
Epoch    59: reducing learning rate of group 0 to 2.5000e-04.
Epoch    70: reducing learning rate of group 0 to 1.2500e-04.
Epoch    81: reducing learning rate of group 0 to 6.2500e-05.
Epoch    92: reducing learning rate of group 0 to 3.1250e-05.
Epoch   103: reducing learning rate of group 0 to 1.5625e-05.
Epoch   114: reducing learning rate of group 0 to 7.8125e-06.
Epoch   125: reducing learning rate of group 0 to 3.9063e-06.
Epoch   136: reducing learning rate of group 0 to 1.9531e-06.
Epoch   147: reducing learning rate of group 0 to 9.7656e-07.
Epoch   158: reducing learning rate of group 0 to 4.8828e-07.
Epoch   169: reducing learning rate of group 0 to 2.4414e-07.
Epoch   180: reducing learning rate of group 0 to 1.2207e-07.
Epoch   191: reducing learning rate of group 0 to 6.1035e-08.
CNN 0, Validation accuracy=0.8829268292682927
Epoch    61: reducing learning rate of group 0 to 5.0000e-04.
Epoch    72: reducing le

Epoch    38: reducing learning rate of group 0 to 5.0000e-04.
Epoch    56: reducing learning rate of group 0 to 2.5000e-04.
Epoch    67: reducing learning rate of group 0 to 1.2500e-04.
Epoch    78: reducing learning rate of group 0 to 6.2500e-05.
Epoch    89: reducing learning rate of group 0 to 3.1250e-05.
Epoch   100: reducing learning rate of group 0 to 1.5625e-05.
Epoch   111: reducing learning rate of group 0 to 7.8125e-06.
Epoch   122: reducing learning rate of group 0 to 3.9063e-06.
Epoch   133: reducing learning rate of group 0 to 1.9531e-06.
Epoch   144: reducing learning rate of group 0 to 9.7656e-07.
Epoch   155: reducing learning rate of group 0 to 4.8828e-07.
Epoch   166: reducing learning rate of group 0 to 2.4414e-07.
Epoch   177: reducing learning rate of group 0 to 1.2207e-07.
Epoch   188: reducing learning rate of group 0 to 6.1035e-08.
Epoch   199: reducing learning rate of group 0 to 3.0518e-08.
CNN 9, Validation accuracy=0.8878048780487805
Epoch    34: reducing le

In [None]:
df_test = pd.read_csv(config.TEST_CSV)
test_dataset = dataset.EMNISTTestDataset(df_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.TEST_BATCH_SIZE)

In [None]:
result = np.zeros((len(df_test), 10))
for i, model in enumerate(cnns):
    preds = engine.infer(test_loader, model, device)
    preds = np.array(preds)
    
    result += preds
result = np.argmax(result, axis=1)

In [None]:
submission = pd.DataFrame({"id": df_test.id, "digit": result})
submission.to_csv("../output/ensemble_more_augs.csv", index=False)
submission.head()