In [19]:
import os
import time
import random
from tqdm import tqdm

# handing
import pandas as pd
import numpy as np

import cv2

# torch
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# optim, scheduler
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from warmup_scheduler import GradualWarmupScheduler

# pytorch-lightning
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor

# pre-trained models
import timm

# augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# cross-validation
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, roc_auc_score

# logger
import wandb

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# warnings
import warnings
warnings.filterwarnings('ignore')


In [20]:
from func import *

In [21]:
class config:
    data_dir = '../data/'
    
    device = device = "cuda" if torch.cuda.is_available() else "cpu"

    img_size = 256
    epochs = 40
    lr = 1e-3 # [1e-3, 0.00025]
    batch_size = 32
    val_batch_size = 64
    
    num_workers = 0
    
    k = 5
    seed = 42

    train_dataset = None
    valid_dataset = None

    version = 'baseline_gray'

In [32]:
class PlantDataset(Dataset):
    def __init__(self, config, df, mode, transforms=None):
        self.config = config
        self.before_img_path = df['before_file_path']
        self.after_img_path = df['after_file_path']
        
        self.labels = df['time_delta']
        
        self.mode = mode
        self.transforms = transforms
        
        self.images = []
        
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        before_img = cv2.imread(self.before_img_path[idx], cv2.IMREAD_GRAYSCALE) #COLOR_BGR2RGB, IMREAD_GRAYSCALE
        after_img = cv2.imread(self.after_img_path[idx], cv2.IMREAD_GRAYSCALE)
        # before_img, after_img = before_img/255., after_img/255.
        label = self.labels[idx]

        if self.transforms!=None:
            before_img = self.transforms(image=before_img)['image']
            after_img = self.transforms(image=after_img)['image']

        data = {
                    'be_img':torch.tensor(before_img, dtype=torch.float32),
                    'af_img':torch.tensor(after_img, dtype=torch.float32),
                    'label':torch.tensor(label).long(),
                }
        return data

In [33]:
def transforms():
    return A.Compose([
        #     A.Resize(config.img_size, config.img_size),
            ToTensorV2()
    ])

In [34]:
test = pd.read_csv('../data/test_dataset/test_data.csv')
test['before_file_path'] = test['before_file_path'].apply(lambda x: '../data/test_dataset/' + x.split('_')[1] + '_adjust/' + x.split('_')[2] + '/' + x + '.png')
test['after_file_path'] = test['after_file_path'].apply(lambda x: '../data/test_dataset/' + x.split('_')[1] + '_adjust/' + x.split('_')[2] + '/' + x + '.png')

# test['before_file_path'] = test['before_file_path'].apply(lambda x: x.replace('.png', '_resize256.png'))
# test['after_file_path'] = test['after_file_path'].apply(lambda x: x.replace('.png', '_resize256.png'))

test['time_delta'] = 0

df_train = pd.read_csv('../data/train.csv')
df_train['type'] = df_train['before_file_path'].apply(lambda x: 'BC' if 'BC' in x else 'LT')
df_train['splits'] = df_train['before_file_path'].apply(lambda x: x.split('adjust/')[-1][:5])# + '_' + df_train['time_delta'].astype(str)
train1 = df_train[df_train['type']=='BC'].reset_index(drop=True)
train2 = df_train[df_train['type']=='LT'].reset_index(drop=True)

gk = GroupKFold(n_splits=config.k)
n_splits = list(gk.split(train1, y=train1['time_delta'], groups=train1['splits']))
n_splits2 = list(gk.split(train2, y=train2['time_delta'], groups=train2['splits']))
train1['n_fold'] = -1
train2['n_fold'] = -1
for i in range(config.k):
    train1.loc[n_splits[i][1], 'n_fold'] = i
    train2.loc[n_splits2[i][1], 'n_fold'] = i

In [35]:
test['type'] = test['before_file_path'].apply(lambda x: 'BC' if 'BC' in x else 'LT')

In [36]:
config.valid_dataset = PlantDataset(config, test, mode='train', transforms=transforms())
loader = DataLoader(
                            config.valid_dataset,
                            batch_size=config.val_batch_size,
                            num_workers=config.num_workers,
                            shuffle=False,
                            drop_last=False,
                            pin_memory=True,
                        )

In [37]:
def tta(image1, image2, n):
    if n==0:
        pass
    elif n==1:
        image1, image2 = image1.flip(2), image2.flip(2)
    elif n==2:
        image1, image2 = image1.flip(3), image2.flip(3)
    elif n==3:
        image1, image2 = image1.flip(2).flip(3), image2.flip(2).flip(3)
    return image1, image2

In [43]:
oof = {}
test_preds = []
model_path = 'model/cc5__b0__relu__aug7/'
models = ['5fold_0__epoch=5_total_val_loss=1.6163_total_val_mse=8.0251.ckpt', 
          '5fold_1__epoch=22_total_val_loss=1.0144_total_val_mse=3.0232.ckpt', 
          '5fold_2__epoch=9_total_val_loss=3.6898_total_val_mse=30.4583.ckpt', 
          '5fold_3__epoch=33_total_val_loss=1.0335_total_val_mse=2.7372.ckpt', 
          '5fold_4__epoch=15_total_val_loss=1.7424_total_val_mse=8.3490.ckpt']
for fold, m in tqdm(enumerate(models)):
    
    vv = pd.concat([train1[train1['n_fold']==fold], train2[train2['n_fold']==fold]]).reset_index(drop=True)
    valid_loader = DataLoader(
                                PlantDataset(config, vv, mode='valid', transforms=transforms()),
                                batch_size=config.val_batch_size,
                                num_workers=config.num_workers,
                                shuffle=False,
                                drop_last=False,
                                pin_memory=True,
                            )
    
    model = plModel(config)
    model.load_state_dict(torch.load(model_path+m)['state_dict'])
    model.to(config.device).eval()
    
    valids = []
    preds = []
    with torch.no_grad():
        for batch in valid_loader:
            # tta
            # tta_preds = []
            # for n in range(4):
            #     img1, img2 = tta(batch['be_img'], batch['af_img'], n)
            #     pred = model(img1.to(config.device), img2.to(config.device))
            #     tta_preds += [pred.clone().detach().cpu().numpy()]
            # valids += [np.median(tta_preds, 0)]
            pred = model(batch['be_img'].to(config.device), batch['af_img'].to(config.device))[0]
            valids += [pred.clone().detach().cpu().numpy()]
    vv['oof'] = np.concatenate(valids)
    oof[fold] = vv
    
    with torch.no_grad():
        for batch in loader:
            # tta
            # tta_preds = []
            # for n in range(4):
            #     img1, img2 = tta(batch['be_img'], batch['af_img'], n)
            #     pred = model(img1.to(config.device), img2.to(config.device))
            #     tta_preds += [pred.clone().detach().cpu().numpy()]
            # preds += [np.median(tta_preds, 0)]
            
            pred = model(batch['be_img'].to(config.device), batch['af_img'].to(config.device))[0]
            preds += [pred.clone().detach().cpu().numpy()]
            
    test_preds += [np.concatenate(preds)]

5it [02:55, 35.19s/it]


In [44]:
vv[(vv['time_delta'] - vv['oof']).abs()>4]

Unnamed: 0,before_file_path,after_file_path,time_delta,type,splits,n_fold,oof
127,../data/train_dataset/BC_adjust/BC_02/DAT04.png,../data/train_dataset/BC_adjust/BC_02/DAT18.png,14,BC,BC_02,4,18.578541
128,../data/train_dataset/BC_adjust/BC_02/DAT04.png,../data/train_dataset/BC_adjust/BC_02/DAT19.png,15,BC,BC_02,4,19.240488
161,../data/train_dataset/BC_adjust/BC_02/DAT05.png,../data/train_dataset/BC_adjust/BC_02/DAT17.png,12,BC,BC_02,4,16.458447
162,../data/train_dataset/BC_adjust/BC_02/DAT05.png,../data/train_dataset/BC_adjust/BC_02/DAT18.png,13,BC,BC_02,4,18.048851
163,../data/train_dataset/BC_adjust/BC_02/DAT05.png,../data/train_dataset/BC_adjust/BC_02/DAT19.png,14,BC,BC_02,4,18.699745
...,...,...,...,...,...,...,...
2859,../data/train_dataset/LT_adjust/LT_05/DAT24.png,../data/train_dataset/LT_adjust/LT_05/DAT34.png,10,LT,LT_05,4,5.768867
2860,../data/train_dataset/LT_adjust/LT_05/DAT24.png,../data/train_dataset/LT_adjust/LT_05/DAT35.png,11,LT,LT_05,4,6.960283
2862,../data/train_dataset/LT_adjust/LT_05/DAT24.png,../data/train_dataset/LT_adjust/LT_05/DAT37.png,13,LT,LT_05,4,8.690209
2863,../data/train_dataset/LT_adjust/LT_05/DAT24.png,../data/train_dataset/LT_adjust/LT_05/DAT38.png,14,LT,LT_05,4,9.154720


In [45]:
sub = pd.read_csv('../data/sample_submission.csv')
sub['time_delta'] = np.median(test_preds, 0)
sub['time_delta'] = sub['time_delta'].clip(1, 42)
sub

Unnamed: 0,idx,time_delta
0,0,25.085854
1,1,31.721918
2,2,2.947308
3,3,3.892324
4,4,23.394672
...,...,...
3955,3955,1.000000
3956,3956,25.894575
3957,3957,20.753315
3958,3958,4.258999


In [46]:
sub.to_csv('../submit/cc5__b0__relu__aug7__5folds.csv', index=False)

In [22]:
sub = pd.read_csv('../submit/cc__b0__each__aug5__5folds.csv')
sub2 = pd.read_csv('../submit/cc5__seresnext1__each__fc_expand__aug5__5folds.csv')

In [23]:
en_sub = pd.read_csv('../data/sample_submission.csv')
en_sub['time_delta'] = (sub['time_delta'] + sub2['time_delta'])/2

In [24]:
en_sub.to_csv('../submit/b0_seresnext_ensembles.csv', index=False)