In [1]:
import os
import pandas as pd
import numpy as np

import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader 
import timm
from tqdm import tqdm

In [20]:
model_name = 'tf_efficientnet_b5_ns'
model_path = '../models/b5_best'
bs = 20
device = 'cuda:0'

In [3]:
test = pd.read_csv('../data/sample_submission.csv')
image_path = np.array([os.path.join('../data/test_dirty_mnist_2nd/', f'{i}.png') for i in test['index']])

In [4]:
class PathDataset(Dataset): 
    def __init__(self, image_paths, labels=None, transforms=None, is_test=False): 
        self.image_paths = image_paths
        self.labels = labels 
        self.transforms = transforms
        self.is_test = is_test

        self.imgs = []
        for img_path in tqdm(self.image_paths):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            self.imgs.append(img)

    def __getitem__(self, index):
        # img = cv2.imread(self.image_paths[index])
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = self.imgs[index]
        if self.transforms:
            img = self.transforms(image=img)['image'].astype(np.float32)
        img = self.normalize_img(img)
        img = self.to_torch_tensor(img)

        if self.is_test:
            return img
        else:
            return img, torch.tensor(self.labels[index], dtype=torch.float32)

    def __len__(self): 
        return len(self.image_paths)

    def normalize_img(self, img):
        mean = np.array([123.675, 116.28 , 103.53 ], dtype=np.float32)
        std = np.array([58.395   , 57.120, 57.375   ], dtype=np.float32)
        img = img.astype(np.float32)
        img -= mean
        img *= np.reciprocal(std, dtype=np.float32)
        return img

    def to_torch_tensor(self,img):
        return torch.from_numpy(img.transpose((2, 0, 1)))


In [5]:
test_dataset = PathDataset(image_paths=image_path, transforms=None, is_test=True)
test_loader = DataLoader(dataset=test_dataset, 
                        batch_size=bs, 
                        num_workers=0,
                        shuffle=False, pin_memory=True)

100%|██████████| 5000/5000 [00:03<00:00, 1333.39it/s]


In [21]:
def build_model(model_name, device):
    model = timm_models(model_name)
    model = model.to(device)
    return model

class timm_models(nn.Module):
    def __init__(self, model_name):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=True, num_classes=512)
        self.dropouts = nn.ModuleList([nn.Dropout(0.5) for _ in range(5)])
        self.output_layer = nn.Linear(512, 26)
    def forward(self, img):
        feat = self.model(img)
        for i, dropout in enumerate(self.dropouts):
            if i==0:
                output = self.output_layer(dropout(feat))
            else:
                output += self.output_layer(dropout(feat))
        else:
            output /= len(self.dropouts)
        outputs = torch.sigmoid(output)
        # outputs = torch.sigmoid(self.output_layer(feat))
        return outputs

def get_trans(img, I):
    if I >= 4:
        img = img.transpose(2,3)
    if I % 4 == 0:
        return img
    elif I % 4 == 1:
        return img.flip(2)
    elif I % 4 == 2:
        return img.flip(3)
    elif I % 4 == 3:
        return img.flip(2).flip(3)

In [22]:
model = build_model(model_name, device)

In [23]:
fold_predictions = []
n_test = 4

for fold in range(2):
    m_path = os.path.join(model_path, f'fold_epoch_{fold}.pth')
    model.load_state_dict(torch.load(m_path))
    model.eval()
    predictions = []

    bar = tqdm(test_loader)
    with torch.no_grad():
        for images in bar:
            images = images.to(device)
            
            for I in range(n_test):
                if I == 0:
                    outputs = model(get_trans(images, I))
                else:
                    outputs += model(get_trans(images, I))
            else:
                outputs /= n_test
            
            predictions.append(outputs.cpu().detach().numpy())
    predictions = np.concatenate(predictions)
    fold_predictions.append(predictions)

100%|██████████| 250/250 [00:50<00:00,  4.92it/s]
100%|██████████| 250/250 [00:51<00:00,  4.90it/s]


In [11]:
predictions_b7 = np.mean(fold_predictions, 0)

In [17]:
predictions_b6 = np.mean(fold_predictions, 0)

In [24]:
predictions_b5 = np.mean(fold_predictions, 0)

In [None]:
predictions_b6*0.3 + predictions_b7*0.7

In [27]:
predictions = predictions_b6*0.3 + predictions_b7*0.7
test.loc[:, 'a':] = np.where(predictions>=0.5, 1, 0)

In [28]:
test.to_csv('../submit/final_ensemble.csv', index=False)

In [33]:
test.loc[:, 'a':].sum(1).min()

6

In [11]:
predictions = np.mean(fold_predictions, 0)
test.loc[:, 'a':] = np.where(predictions>=0.5, 1, 0)

In [20]:
test.to_csv('../submit/aug_fold_baseline_b7.csv', index=False)

In [25]:
predictions = predictions_b6*0.3 + predictions_b7*0.7
test.loc[:, 'a':] = np.where(predictions>=0.5, 1, 0)

In [26]:
test.to_csv('../submit/final_b7+b6.csv', index=False)