In [1]:
from glob import glob
from sklearn.model_selection import GroupKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
import sklearn

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

DATA_ROOT_PATH = '/home/heye0507/alask2/'

In [2]:
from efficientnet_pytorch import EfficientNet

def get_net():
    net = EfficientNet.from_pretrained('efficientnet-b3',num_classes=4)
    return net

net = get_net().cuda()

Loaded pretrained weights for efficientnet-b3


In [3]:
!ls {DATA_ROOT_PATH}/models/effb3_fold2/best-checkpoint-044epoch.bin

/home/heye0507/alask2//models/effb3_fold2/best-checkpoint-044epoch.bin


In [4]:
checkpoint = torch.load(f'{DATA_ROOT_PATH}/models/effb3_fold2/best-checkpoint-044epoch.bin')
net.load_state_dict(checkpoint['model_state_dict']);
net.eval();

In [5]:
class DatasetSubmissionRetriever(Dataset):

    def __init__(self, image_names, transforms=None):
        super().__init__()
        self.image_names = image_names
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_name = self.image_names[index]
        image = cv2.imread(f'{DATA_ROOT_PATH}/data/Test/{image_name}', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']

        return image_name, image

    def __len__(self) -> int:
        return self.image_names.shape[0]

In [6]:
def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            A.Normalize(p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [21]:
!ls {DATA_ROOT_PATH}/data

alaska2-image-steganalysis.zip	group_split.csv  JUNIWARD  UERD
Cover				JMiPOD		 Test


In [7]:
dataset = DatasetSubmissionRetriever(
    image_names=np.array([path.split('/')[-1] for path in glob('/home/heye0507/alask2/data/Test/*.jpg')]),
    transforms=get_valid_transforms(),
)


data_loader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=False,
    num_workers=2,
    drop_last=False,
)

In [8]:
%%time

result = {'Id': [], 'Label': []}
for step, (image_names, images) in enumerate(data_loader):
    print(step, end='\r')
    
    y_pred = net(images.cuda())
    y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]
    
    result['Id'].extend(image_names)
    result['Label'].extend(y_pred)

CPU times: user 1min 11s, sys: 21.5 s, total: 1min 32s
Wall time: 1min 33s


In [9]:
submission = pd.DataFrame(result)
submission.to_csv('submission.csv', index=False)
submission.head()

Unnamed: 0,Id,Label
0,0997.jpg,0.67494
1,3341.jpg,0.630643
2,2005.jpg,0.146035
3,4807.jpg,0.709994
4,0727.jpg,0.894155


In [10]:
!ls /home/heye0507/alask2/nbs/submission.csv

baseline.ipynb	effb2_inference.ipynb  effb5_base.py
effb2_base.py	effb4_base.py	       submission.csv


In [4]:
!kaggle competitions submit -c alaska2-image-steganalysis -f /home/heye0507/alask2/nbs/submission.csv -m effb3_fold4_tta

100%|████████████████████████████████████████| 137k/137k [00:01<00:00, 99.9kB/s]
Successfully submitted to ALASKA2 Image Steganalysis

# TTA

In [10]:
class AlaskTTA:
    """ author: @shonenkov """
    image_size = 512

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

class TTABypass(AlaskTTA):
    '''author: @dreamdragon'''
    
    def augment(self,image):
        return image
    
    def batch_augment(self,images):
        return images
    
    def deaugment_boxes(self,boxes):
        return boxes

class TTAHorizontalFlip(AlaskTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip(AlaskTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90(AlaskTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [1,3]]
        res_boxes[:, [1,3]] = boxes[:, [2,0]]
        return res_boxes

class TTACompose(AlaskTTA):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)

In [11]:
tta_transforms = [
    TTABypass(),
    TTAHorizontalFlip(),
    TTAVerticalFlip(),
    TTAHorizontalFlip(),TTAVerticalFlip(),
]

In [12]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [13]:
result = {'Id': [], 'Label': []}
for step, (image_names,images) in enumerate(data_loader):
    print(step, end='\r')
    with torch.no_grad():
        images = images.to(device)
        predictions = []
        for tta_transform in tta_transforms:

            y_pred = net(tta_transform.batch_augment(images.clone()))
            y_pred = 1 - nn.functional.softmax(y_pred, dim=1).data.cpu().numpy()[:,0]

            predictions.append(y_pred)

        result['Id'].extend(image_names)
        result['Label'].extend(np.mean(predictions,axis=0))

624

In [14]:
submission = pd.DataFrame(result)
submission.to_csv('submission_fold2_tta.csv', index=False)
submission.head()

Unnamed: 0,Id,Label
0,0997.jpg,0.688886
1,3341.jpg,0.610389
2,2005.jpg,0.198463
3,4807.jpg,0.722501
4,0727.jpg,0.808634
