# Ensemble

In [14]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [15]:
import sys
sys.path.insert(0, "../src")

In [16]:
import numpy as np
import pandas as pd
import torch
from tqdm import tqdm

from sklearn import metrics
from sklearn import model_selection

import albumentations

import config
import dataset
import engine
import models

In [17]:
nets = 15
cnns = [models.SpinalVGG() for _ in range(nets)]
# cnns = [models.Model() for _ in range(nets)]

In [18]:
df = pd.read_csv(config.TRAIN_CSV)

In [19]:
augs = albumentations.Compose(
    [
#         albumentations.ShiftScaleRotate(
#             shift_limit=0.0625,
#             scale_limit=0.1,
#             rotate_limit=10,
#             p=0.9,
#         ),
        albumentations.Normalize(config.MEAN, config.STD, max_pixel_value=255.0, always_apply=True),
    ]
)

In [20]:
device = torch.device(config.DEVICE)
EPOCHS = 25

for i, model in enumerate(cnns):
    df_train, df_valid = model_selection.train_test_split(df, test_size=0.1, stratify=df.digit)
    train_dataset = dataset.EMNISTDataset(df_train, augs=augs)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.TRAIN_BATCH_SIZE, shuffle=True)
    valid_dataset = dataset.EMNISTDataset(df_valid)
    valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.TEST_BATCH_SIZE)
    
    optimizer = torch.optim.Adam(model.parameters())
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', verbose=True, patience=7, factor=0.5
    )

    model.to(device)
    
    for epoch in range(EPOCHS):
        engine.train(train_loader, model, optimizer, device)
        predictions, targets = engine.evaluate(valid_loader, model, device)

        predictions = np.array(predictions)
        predictions = np.argmax(predictions, axis=1)
        accuracy = metrics.accuracy_score(targets, predictions)
        
        scheduler.step(accuracy)
            
    print(f"CNN {i}, Validation accuracy={accuracy}")

Epoch    22: reducing learning rate of group 0 to 5.0000e-04.
CNN 0, Validation accuracy=0.6146341463414634
Epoch    17: reducing learning rate of group 0 to 5.0000e-04.
CNN 1, Validation accuracy=0.5951219512195122
Epoch    18: reducing learning rate of group 0 to 5.0000e-04.
CNN 2, Validation accuracy=0.5853658536585366


KeyboardInterrupt: 

In [8]:
df_test = pd.read_csv(config.TEST_CSV)
test_dataset = dataset.EMNISTTestDataset(df_test)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config.TEST_BATCH_SIZE)

In [13]:
result = np.zeros((len(df_test), 10))
for i, model in tqdm(enumerate(cnns), total=len(cnns)):
    preds = engine.infer(test_loader, model, device)
    preds = np.array(preds)
    result += preds
result = np.argmax(result, axis=1)

100%|██████████| 20/20 [00:45<00:00,  2.26s/it]


In [12]:
submission = pd.DataFrame({"id": df_test.id, "digit": result})
submission.to_csv("../output/ensemble_spinal_2.csv", index=False)
submission.head()

Unnamed: 0,id,digit
0,2049,6
1,2050,9
2,2051,8
3,2052,0
4,2053,3
