In [1]:
import os
import numpy as np 
import pandas as pd
import json

In [2]:
submission = pd.read_csv('sample_submission.csv')
submission.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,-6.580763,2.988371,8.956187,-7.519885
1,Test_1,-3.65608,0.191989,6.391723,-4.056837
2,Test_2,-6.070339,-1.365192,-5.448173,10.511926
3,Test_3,5.97767,-2.91257,-1.345271,-1.67503
4,Test_4,-5.337641,0.814937,7.146433,-3.8532


In [3]:
# ====================================================
# Library
# ====================================================

import sys

import gc
import os
import random
import time
from contextlib import contextmanager
from pathlib import Path
from collections import defaultdict, Counter

import cv2
from PIL import Image
import numpy as np
import pandas as pd
import scipy as sp

import sklearn.metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

from functools import partial
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.optim import Adam, SGD
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
import torchvision.models as models

from albumentations import Compose, Normalize, Resize, RandomResizedCrop, RandomCrop, HorizontalFlip
from albumentations.pytorch import ToTensorV2
import pretrainedmodels as models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

  data = yaml.load(f.read()) or {}


device(type='cuda')

In [4]:
# ====================================================
# Utils
# ====================================================

@contextmanager
def timer(name):
    t0 = time.time()
    LOGGER.info(f'[{name}] start')
    yield
    LOGGER.info(f'[{name}] done in {time.time() - t0:.0f} s.')

    
def init_logger(log_file='train.log'):
    from logging import getLogger, DEBUG, FileHandler,  Formatter,  StreamHandler
    
    log_format = '%(asctime)s %(levelname)s %(message)s'
    
    stream_handler = StreamHandler()
    stream_handler.setLevel(DEBUG)
    stream_handler.setFormatter(Formatter(log_format))
    
    file_handler = FileHandler(log_file)
    file_handler.setFormatter(Formatter(log_format))
    
    logger = getLogger('Herbarium')
    logger.setLevel(DEBUG)
    logger.addHandler(stream_handler)
    logger.addHandler(file_handler)
    
    return logger

LOG_FILE = 'train.log'
LOGGER = init_logger(LOG_FILE)


def seed_torch(seed=777):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

SEED = 777
seed_torch(SEED)

In [5]:
class PredictData(Dataset):
    def __init__(self,csv,transform):
        self.data = csv
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        image = Image.open('images/'+self.data.loc[idx]['image_id']+'.jpg')
        image = self.transform(image)
        return {'images':image}

In [6]:
test_df = pd.read_csv('test.csv')
test_df.head()

Unnamed: 0,image_id
0,Test_0
1,Test_1
2,Test_2
3,Test_3
4,Test_4


In [7]:
from torchvision import transforms, models

In [8]:
simple_transform = transforms.Compose([transforms.Resize((320,512)),transforms.ToTensor(),transforms.Normalize([0.485, 0.456, 0.406],[0.229,0.224,0.225])])

In [9]:
predictionData = PredictData(test_df,simple_transform)

In [10]:
predictloader = DataLoader(predictionData)

In [11]:
from functools import partial

import torch
from torch import nn
from torch.nn import functional as F
import torchvision.models as M

class AvgPool(nn.Module):
    def forward(self, x):
        return F.avg_pool2d(x, x.shape[2:])

def create_net(net_cls, pretrained: bool):
    if True and pretrained:
        net = net_cls()
        model_name = net_cls.__name__
        weights_path = f'{model_name}/{model_name}.pth'
        net.load_state_dict(torch.load(weights_path))
    else:
        net = net_cls(pretrained=pretrained)
    return net


class ResNet(nn.Module):
    def __init__(self, num_classes,
                 pretrained=False, net_cls=M.resnet50, dropout=False):
        super().__init__()
        self.net = create_net(net_cls, pretrained=pretrained)
        self.net.avgpool = AvgPool()
        if dropout:
            self.net.fc = nn.Sequential(
                nn.Dropout(),
                nn.Linear(self.net.fc.in_features, num_classes),
            )
        else:
            self.net.fc = nn.Linear(self.net.fc.in_features, num_classes)

    def fresh_params(self):
        return self.net.fc.parameters()

    def forward(self, x):
        return self.net(x)


class DenseNet(nn.Module):
    def __init__(self, num_classes,
                 pretrained=False, net_cls=M.densenet121):
        super().__init__()
        self.net = create_net(net_cls, pretrained=pretrained)
        self.avg_pool = AvgPool()
        self.net.classifier = nn.Linear(
            self.net.classifier.in_features, num_classes)

    def fresh_params(self):
        return self.net.classifier.parameters()

    def forward(self, x):
        out = self.net.features(x)
        out = F.relu(out, inplace=True)
        out = self.avg_pool(out).view(out.size(0), -1)
        out = self.net.classifier(out)
        return out


resnet18 = partial(ResNet, net_cls=M.resnet18)
resnet34 = partial(ResNet, net_cls=M.resnet34)
resnet50 = partial(ResNet, net_cls=M.resnet50)
resnet101 = partial(ResNet, net_cls=M.resnet101)
resnet152 = partial(ResNet, net_cls=M.resnet152)

densenet121 = partial(DenseNet, net_cls=M.densenet121)
densenet169 = partial(DenseNet, net_cls=M.densenet169)
densenet201 = partial(DenseNet, net_cls=M.densenet201)
densenet161 = partial(DenseNet, net_cls=M.densenet161)

In [12]:
N_CLASSES = 4

In [13]:
criterion = nn.BCEWithLogitsLoss(reduction='none')
#model = resnet50(num_classes=N_CLASSES, pretrained=True)

In [14]:
import pretrainedmodels as models
from torch import nn, cuda

In [15]:
model = models.se_resnext50_32x4d()
feature_dim = model.last_linear.in_features
class AvgPool(nn.Module):
    def forward(self, x):
        # print (x.size())
        return F.avg_pool2d(x, x.shape[2:])
model.avg_pool = AvgPool()
model.avgpool = AvgPool()
model.last_linear = nn.Linear(feature_dim, N_CLASSES)
use_cuda = cuda.is_available()
fresh_params = list(model.last_linear.parameters())
all_params = list(model.parameters())

In [16]:
from typing import Dict

In [17]:
def load_model(model: nn.Module, path: Path) -> Dict:
    state = torch.load(str(path))
    model.load_state_dict(state['model'])
    print('Loaded model from epoch {epoch}, step {step:,}'.format(**state))
    return state

In [18]:
load_model(model, 'best-model.pt')

Loaded model from epoch 6, step 0


{'model': OrderedDict([('layer0.conv1.weight',
               tensor([[[[ 3.0938e-01, -1.1450e+00, -1.8857e+00,  ...,  2.3302e+00,
                          -2.5942e+00,  2.4565e+00],
                         [ 1.2546e+00, -2.5060e+00, -5.1403e+00,  ...,  8.4138e+00,
                          -3.4907e+00, -1.9973e+00],
                         [ 3.7464e+00, -5.3472e+00, -9.2393e+00,  ...,  1.4150e+01,
                          -4.0582e+00, -5.3479e+00],
                         ...,
                         [ 4.0534e+00, -7.7758e+00, -1.2367e+01,  ...,  1.8687e+01,
                          -2.3814e+00, -8.4856e+00],
                         [ 3.2233e+00, -4.8134e+00, -9.1263e+00,  ...,  1.3481e+01,
                          -1.5881e+00, -5.1764e+00],
                         [ 2.5675e+00, -1.7637e+00, -4.7620e+00,  ...,  7.7239e+00,
                          -7.5109e-01, -4.7060e-01]],
               
                        [[ 2.2217e-01, -2.0809e+00, -3.0609e+00,  ...,  4.0250e+00,


In [26]:
with timer('inference'):
    
    model.to(device) 
    
    preds = []
    tk0 = tqdm(enumerate(predictloader), total=len(predictloader))

    for i, images in tk0:
            
        images = images['images'].to(device)
            
        with torch.no_grad():
            y_preds = model(images)
            
        #preds.append(torch.sigmoid(y_preds).to('cpu').numpy())
        preds.append(torch.softmax(y_preds, dim=1).to('cpu').numpy())

2020-06-02 19:22:54,258 INFO [inference] start
100%|██████████| 1821/1821 [01:21<00:00, 22.26it/s]
2020-06-02 19:24:16,073 INFO [inference] done in 82 s.


In [27]:
submission_df = pd.read_csv('sample_submission.csv')
submission_df.iloc[:, 1:] = 0

submission_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,0,0,0,0
1,Test_1,0,0,0,0
2,Test_2,0,0,0,0
3,Test_3,0,0,0,0
4,Test_4,0,0,0,0


In [28]:
predictions = np.concatenate(preds)

In [29]:
predictions

array([[0.00461631, 0.46502057, 0.4746719 , 0.05569125],
       [0.00345211, 0.4582207 , 0.4868876 , 0.05143964],
       [0.0060435 , 0.53424996, 0.28831434, 0.17139222],
       ...,
       [0.00461447, 0.41151178, 0.5382474 , 0.04562632],
       [0.01323953, 0.43574095, 0.46046695, 0.09055262],
       [0.00536344, 0.5884421 , 0.24534388, 0.16085055]], dtype=float32)

In [30]:
submission_df[['healthy', 'multiple_diseases', 'rust', 'scab']] = predictions

In [31]:
submission_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Test_0,0.004616,0.465021,0.474672,0.055691
1,Test_1,0.003452,0.458221,0.486888,0.05144
2,Test_2,0.006044,0.53425,0.288314,0.171392
3,Test_3,0.00938,0.491858,0.399262,0.0995
4,Test_4,0.005189,0.500143,0.417596,0.077072


In [32]:
submission_df.to_csv('submission.csv', index=False)