In [None]:
import os
import sys
import time
import math
import numpy as np
import pandas as pd
from shutil import copyfile
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

import cv2
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensor

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold, StratifiedKFold

from apex import amp
import pretrainedmodels
from torchcontrib.optim import SWA
# from efficientnet_pytorch import EfficientNet

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.swa_utils import AveragedModel, SWALR
from tqdm.notebook import tqdm
from torch.optim import Adam, SGD, AdamW
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau, CyclicLR
from torch.utils.data.sampler import SubsetRandomSampler, RandomSampler, SequentialSampler
from torch.utils.data import DataLoader, Dataset

In [None]:
root = './input/'

df = pd.read_csv('./input/train.csv')

In [None]:
transforms_train = A.Compose([                       
    A.Resize(height=256, width=256, p=1.0),
    A.ShiftScaleRotate(p=0.5),
    A.Flip(),
    A.RandomBrightnessContrast(),
])

transforms_valid = A.Compose([
    A.Resize(height=256, width=256, p=1.0),
])

In [None]:
class LeafDataset(Dataset):
    def __init__(self, df, labels, transform=None):
        self.df = df
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        fname = self.df['image_id'].values[index]
        fpath = f'./input/train_images/{fname}'

        image = cv2.imread(fpath, cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.transform:
            image = self.transform(image=image)
            image = image['image']

        label = self.labels[index]

        image = image.astype(np.float32)
        image /= 255.0
        image = image.transpose(2, 0, 1)

        return torch.tensor(image), torch.tensor(label)

In [None]:
train_image = LeafDataset(df[:1000].reset_index(drop=True), df[:1000].reset_index(drop=True)['label'], transform=transforms_train)

import matplotlib.pyplot as plt
from pylab import rcParams
rcParams['figure.figsize'] = 20, 10
for i in range(2):
    f, axarr = plt.subplots(1, 5)
    for p in range(5):
        idx = np.random.randint(0, len(train_image))
        img, label = train_image[idx]
        axarr[p].imshow(img.transpose(0, 1).transpose(1, 2))
        axarr[p].set_title(label)

In [None]:
model_name = 'resnet34'
model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')

In [None]:
class resnet34(nn.Module):

    def __init__(self, model=model):
        super(resnet34, self).__init__()

        model = model
        model = list(model.children())
        model = nn.Sequential(*model[:-2])

        self.base_model = model
        self.adaptivepooling = nn.AdaptiveAvgPool2d(1)
        self.flatten = nn.Flatten()

        self.fc1 = nn.Linear(in_features=512, out_features=5)

    def forward(self, x):
        
        x = self.base_model(x)
        x = self.adaptivepooling(x)
        x = self.flatten(x)

        x1 = self.fc1(x)

        return x1

In [None]:
folds = df.copy()
kf = KFold(n_splits=5, shuffle=True, random_state=42)

for fold, (train_idx, valid_idx) in enumerate(kf.split(folds)):

    train_test = folds.iloc[train_idx]
    train_test.reset_index(drop=True, inplace=True)  

    valid_test = folds.iloc[valid_idx]
    valid_test.reset_index(drop=True, inplace=True)

    train_dataset = LeafDataset(
    train_test,
    train_test['label'],
    transforms_train
    )

    valid_dataset = LeafDataset(
      valid_test,
      valid_test['label'],
      transforms_valid
    )

    train_loader = DataLoader(train_dataset, batch_size=64, num_workers=4, sampler=RandomSampler(train_dataset))
    valid_loader = DataLoader(valid_dataset, batch_size=64, num_workers=4, sampler=SequentialSampler(valid_dataset))

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet34().to(device)
    optimizer = Adam(model.parameters(), lr=3e-4)
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")
    scheduler = CosineAnnealingLR(optimizer, 10)

    loss_func = nn.CrossEntropyLoss().to(device)

    num_epochs=10
    losses = []
    accs = []
    val_losses = []
    val_accs = []
    val_preds = []
    kernel_type = 'resnet34'
    best_file = f'{kernel_type}_best_fold{fold}.bin'
    acc_max = 0

    for epoch in range(num_epochs):

        print('epochs {}/{} '.format(epoch+1,num_epochs))
        print('  ' + ('-' * 20))

        model.train()
        scheduler.step(epoch)

        avg_train_loss = 0.0
        train_running_acc = 0.0
        correct = 0.0

        for step, (data, target) in tqdm(enumerate(train_loader), total=len(train_loader)):

            data = data.to(device)
            target = target.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = loss_func(outputs, target)

            with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()

            avg_train_loss += loss.item() / len(train_loader)
            optimizer.step()

            correct += (outputs.argmax(1) == target).sum().item()
            train_running_acc = correct / len(train_idx)

        losses.append(avg_train_loss)
        accs.append(train_running_acc)

        model.eval()
        avg_val_loss = 0.0
        val_running_acc = 0.0
        correct = 0.0

    with torch.no_grad():

        for step, (data, target) in tqdm(enumerate(valid_loader),total=len(valid_loader)):

            data = data.to(device)
            target = target.to(device)

            outputs = model(data)
            loss = loss_func(outputs, target.squeeze(-1))
            avg_val_loss += loss.item() / len(valid_loader) 

            correct += (outputs.argmax(1) == target).sum().item()
            val_running_acc = correct / len(valid_idx)

        val_losses.append(avg_val_loss)
        val_accs.append(val_running_acc)

    print('train_acc : {:.4f}, val_acc : {:.4f}'.format(train_running_acc, val_running_acc))
    print('train_loss : {:.4f}, val_loss : {:.4f}'.format(avg_train_loss, avg_val_loss))
    print(f'lr: {optimizer.param_groups[0]["lr"]:.7f}')

    if val_running_acc > acc_max:
        print('score2 ({:.6f} --> {:.6f}).  Saving model ...'.format(acc_max, val_running_acc))
        print('\n')
        torch.save(model.state_dict(), best_file)
        acc_max = val_running_acc

    torch.save(model.state_dict(), 'resnet34_final_fold.bin')