In [1]:
import os
import time
import random
from tqdm import tqdm

# handing
import pandas as pd
import numpy as np

import cv2

# torch
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# optim, scheduler
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, CosineAnnealingLR, ReduceLROnPlateau
from warmup_scheduler import GradualWarmupScheduler

# pytorch-lightning
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor

# pre-trained models
import timm

# augmentations
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

# cross-validation
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, roc_auc_score

# logger
import wandb

# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
class config:
    data_dir = '../data/'
    
    device = device = "cuda" if torch.cuda.is_available() else "cpu"

    img_size = 256
    epochs = 20
    lr = 1e-3 # [1e-3, 0.00025]
    batch_size = 64
    val_batch_size = 64
    
    num_workers = 0
    
    k = 5
    seed = 42

    train_dataset = None
    valid_dataset = None

    version = 'baseline'

In [3]:
def seed_everything(seed=42):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
def train_get_transforms():
    return A.Compose([
            A.Resize(config.img_size, config.img_size),
            ToTensorV2()
    ])


def valid_get_transforms():
    return A.Compose([
            A.Resize(config.img_size, config.img_size),
            ToTensorV2()
    ])

In [5]:
class PlantDataset(Dataset):
    def __init__(self, config, df, mode, transforms=None):
        self.config = config
        self.before_img_path = df['before_file_path']
        self.after_img_path = df['after_file_path']
        
        self.labels = df['time_delta']
        
        self.mode = mode
        self.transforms = transforms
        
        self.images = []
        
        # print(f'########################### {mode} dataset loader')
        # for image_path in tqdm(self.image_paths):
        #     image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
        #     self.images.append(image)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        before_img = cv2.imread(self.before_img_path[idx], cv2.COLOR_BGR2RGB)
        after_img = cv2.imread(self.after_img_path[idx], cv2.COLOR_BGR2RGB)
        label = self.labels[idx]

        if self.transforms!=None:
            before_img = self.transforms(image=before_img)['image']
            after_img = self.transforms(image=after_img)['image']

        data = {
                    'be_img':torch.tensor(before_img, dtype=torch.float32),
                    'af_img':torch.tensor(after_img, dtype=torch.float32),
                    'label':torch.tensor(label).long(),
                }
        return data

In [6]:
class plModel(pl.LightningModule):
    def __init__(self, config):
        super(plModel, self).__init__()

        self.config = config
        chans = config.valid_dataset[0]['be_img'].shape[0]

        self.before_model = timm.create_model(model_name='tf_efficientnet_b0_ns', pretrained=True, in_chans=chans) 
        self.after_model = timm.create_model(model_name='tf_efficientnet_b0_ns', pretrained=True, in_chans=chans) 
        # [efficientnet_b1_pruned, efficientnet_lite0, resnet34, tf_efficientnet_b0_ns, densenet121]
        num_classes = 1

        if hasattr(self.before_model, "fc"):
            nb_ft = self.before_model.fc.in_features
            nb_ft = self.after_model.fc.in_features
            self.before_model.fc = nn.Linear(nb_ft, num_classes)
            self.after_model.fc = nn.Linear(nb_ft, num_classes)
        elif hasattr(self.before_model, "_fc"):
            nb_ft = self.before_model._fc.in_features
            nb_ft = self.after_model._fc.in_features
            self.before_model._fc = nn.Linear(nb_ft, num_classes)
            self.after_model._fc = nn.Linear(nb_ft, num_classes)
        elif hasattr(self.before_model, "classifier"):
            nb_ft = self.before_model.classifier.in_features
            nb_ft = self.after_model.classifier.in_features
            self.before_model.classifier = nn.Linear(nb_ft, num_classes)
            self.after_model.classifier = nn.Linear(nb_ft, num_classes)
        elif hasattr(self.before_model, "last_linear"):
            nb_ft = self.before_model.last_linear.in_features
            nb_ft = self.after_model.last_linear.in_features
            self.before_model.last_linear = nn.Linear(nb_ft, num_classes)
            self.after_model.last_linear = nn.Linear(nb_ft, num_classes)
        
        ############################################## Loss 
        self.criterion = nn.L1Loss()
        
    def forward(self, x1, x2):
        out1 = self.before_model(x1)
        out2 = self.after_model(x2)
        out = out2-out1
        return out

    def train_dataloader(self):
        loader = DataLoader(
                            self.config.train_dataset,
                            batch_size=self.config.batch_size,
                            num_workers=self.config.num_workers,
                            shuffle=True,
                            drop_last=True,
                            pin_memory=True,
                        )
        return loader

    def val_dataloader(self):
        loader = DataLoader(
                            self.config.valid_dataset,
                            batch_size=self.config.val_batch_size,
                            num_workers=self.config.num_workers,
                            shuffle=False,
                            drop_last=True,
                            pin_memory=True,
                        )
        return loader

    def training_step(self, train_batch, batch_idx):
        pred = self.forward(train_batch['be_img'], train_batch['af_img'])
        loss = self.criterion(pred, train_batch['label'])
        
        pred = torch.sigmoid(pred)
        self.log("train_loss", loss, on_step=True, prog_bar=True, logger=True)
        return {'loss':loss, 'pred':pred.clone().detach().cpu(), 'label':train_batch['label'].clone().detach().cpu()}
    
    def training_epoch_end(self, outputs):
        avg_loss = torch.stack([x['loss'] for x in outputs]).mean()
        
        preds = torch.cat([x['pred'] for x in outputs])
        labels = torch.cat([x['label'] for x in outputs])
        mse = torch.sum((labels-preds.squeeze())**2) / len(labels)
        
        self.log("total_train_loss", avg_loss, logger=True)
        self.log("total_train_mse", mse, logger=True)

    def validation_step(self, val_batch, batch_idx):
        pred = self.forward(val_batch['be_img'], val_batch['af_img'])
        loss = self.criterion(pred, val_batch['label'])
        
        pred = torch.sigmoid(pred)
        self.log("val_loss", loss, on_step=True, prog_bar=True, logger=True)
        return {"val_loss": loss, 'pred':pred.clone().detach().cpu(), 'label':val_batch['label'].clone().detach().cpu()}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        
        preds = torch.cat([x['pred'] for x in outputs])
        labels = torch.cat([x['label'] for x in outputs])
        mse = torch.sum((labels-preds.squeeze())**2) / len(labels)
        
        self.log("total_val_loss", avg_loss, logger=True)
        self.log("total_val_mse", mse, logger=True)

    def configure_optimizers(self):
        optimizer = AdamW(self.parameters(), lr=self.config.lr, weight_decay=1e-3)
        
        # scheduler_plateau = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=10, verbose=1)
        scheduler_cosine = CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6, last_epoch=-1)
        # scheduler = GradualWarmupSchedulerV2(optimizer, multiplier=1, total_epoch=5, after_scheduler=scheduler_cosine)
        
        return [optimizer], [scheduler_cosine]

In [7]:
def main():
    
    df_train = pd.read_csv('../data/train.csv')
    df_train['before_file_path'] = df_train['before_file_path'].apply(lambda x: x.replace('.png', '_resize256.png'))
    df_train['after_file_path'] = df_train['after_file_path'].apply(lambda x: x.replace('.png', '_resize256.png'))
    df_train['split'] = df_train['before_file_path'].apply(lambda x: x.split('adjust/')[-1][:5]) + '_' + df_train['time_delta'].astype(str)
    
    skf = StratifiedKFold(n_splits=config.k, random_state=config.seed, shuffle=True)
    n_splits = list(skf.split(df_train, df_train['split']))
    
    df_train['n_fold'] = -1
    for i in range(config.k):
        df_train.loc[n_splits[i][1], 'n_fold'] = i
    print(df_train['n_fold'].value_counts())
    
    for fold in range(config.k):
        config.start_time = time.strftime('%Y-%m-%d %H:%M', time.localtime(time.time())).replace(' ', '_')
        
        
        logger = WandbLogger(name=f"{config.start_time}_{config.version}_{config.k}fold_{fold}", 
                                     project='dacon-plant', 
                                     config={key:config.__dict__[key] for key in config.__dict__.keys() if '__' not in key},
                                    )
    
    
        tt = df_train.loc[df_train['n_fold']!=fold].reset_index(drop=True).iloc[:1000]
        vv = df_train.loc[df_train['n_fold']==fold].reset_index(drop=True)
        
        train_transforms = train_get_transforms()
        valid_transforms = valid_get_transforms()
        
        config.train_dataset = PlantDataset(config, tt, mode='train', transforms=train_transforms)
        config.valid_dataset = PlantDataset(config, vv, mode='valid', transforms=valid_transforms)
        
        print('train_dataset input shape, label : ', config.train_dataset[0]['be_img'].shape, config.train_dataset[0]['af_img'].shape, config.train_dataset[0]['label'])
        print('valid_dataset input shape, label : ', config.valid_dataset[0]['be_img'].shape, config.valid_dataset[0]['af_img'].shape, config.valid_dataset[0]['label'])
        
        lr_monitor = LearningRateMonitor(logging_interval='epoch') # ['epoch', 'step']
        checkpoints = ModelCheckpoint('model/'+config.version, monitor='total_val_loss', mode='min', filename=f'{config.k}fold_{fold}__' + '{epoch}_{total_val_loss:.4f}')
        
        model = plModel(config)
        trainer = pl.Trainer(
                            max_epochs=config.epochs, 
                            gpus=1, 
                            log_every_n_steps=50,
                            # gradient_clip_val=1000, gradient_clip_algorithm='value', # defalut : [norm, value]
                            amp_backend='native', precision=16, # amp_backend default : native
                            callbacks=[checkpoints, lr_monitor], 
                            logger=logger
                            ) 
        
        trainer.fit(model)
        del model, trainer
        wandb.finish()
        break

In [8]:
if __name__ == '__main__':
    seed_everything()
    main()

2    2767
0    2767
1    2767
4    2766
3    2766
Name: n_fold, dtype: int64
train_dataset input shape, label :  torch.Size([3, 256, 256]) torch.Size([3, 256, 256]) tensor(1)
valid_dataset input shape, label :  torch.Size([3, 256, 256]) torch.Size([3, 256, 256]) tensor(3)


Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Failed to query for notebook name, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable
[34m[1mwandb[0m: Currently logged in as: [33mhho1028[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.7 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name         | Type         | Params
----------------------------------------------
0 | before_model | EfficientNet | 4.0 M 
1 | after_model  | EfficientNet | 4.0 M 
2 | criterion    | L1Loss       | 0     
----------------------------------------------
8.0 M     Trainable params
0         Non-trainable params
8.0 M     Total params
16.035    Total estimated model params size (MB)


Epoch 7:   7%|▋         | 4/58 [00:04<01:06,  1.24s/it, loss=8.12, v_num=omnr, train_loss=8.270, val_loss_step=7.030, val_loss_epoch=8.320]