In [None]:
import os
import json
import random
import time
import copy
import gc
from glob import glob
from tqdm.auto import tqdm
# from tqdm import tqdm

import pandas as pd
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch import optim
from torch import nn
from torch.optim import lr_scheduler
from torchvision.transforms import ToTensor
from torchvision import transforms
import torchvision.models as models

!pip install transformers
from transformers import ViTFeatureExtractor, ViTModel, ViTForImageClassification, ViTConfig

In [None]:
def extract_day(file_name):
    day = int(file_name.split('.')[-2][-2:])
    return day


def make_day_array(image_pathes):
    day_array = np.array([extract_day(file_name) for file_name in image_pathes])
    return day_array


def make_image_path_array(root_path=None):
    if root_path is None:
        bc_directories = glob('./BC/*')
        lt_directories = glob('./LT/*')

    else:
        bc_directories = glob(root_path + 'BC/*')
        lt_directories = glob(root_path + 'LT/*')

    bc_image_path = []
    for bc_path in bc_directories:
        images = glob(bc_path + '/*.png')
        bc_image_path.extend(images)

    lt_image_path = []
    for lt_path in lt_directories:
        images = glob(lt_path + '/*.png')
        lt_image_path.extend(images)

    return bc_image_path, lt_image_path


def make_dataframe(root_path=None):
    bc_image_path, lt_image_path = make_image_path_array(root_path)
    bc_day_array = make_day_array(bc_image_path)
    lt_day_array = make_day_array(lt_image_path)

    bc_df = pd.DataFrame({'file_name': bc_image_path,
                          'day': bc_day_array})
    bc_df['species'] = 'bc'

    lt_df = pd.DataFrame({'file_name': lt_image_path,
                          'day': lt_day_array})
    lt_df['species'] = 'lt'

    total_data_frame = pd.concat([bc_df, lt_df]).reset_index(drop=True)

    return total_data_frame

def make_combination(species, data_frame):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for version in data_frame[data_frame['species'] == species]['version'].unique():
        for i in range(0, len(data_frame[data_frame['version'] == version]) - 1):
            for j in range(i + 1, len(data_frame[data_frame['version'] == version])):
                after = data_frame[data_frame['version'] == version].iloc[j].reset_index(drop=True)
                before = data_frame[data_frame['version'] == version].iloc[i].reset_index(drop=True)

                before_file_path.append(before[0])
                after_file_path.append(after[0])

                delta = int(after[1] - before[1])
                time_delta.append(delta)


    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df

class KistDataset(Dataset):
    def __init__(self, combination_df, transform, is_test= None):
        self.combination_df = combination_df
        self.transform = transform
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)
    
    
    
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(1000)

In [None]:
root_path = "/usr/src/coco/dacon/data/open/train_dataset/"

bt_direct = glob(root_path + '/BC/*')
bt_direct_name = [x[-5:] for x in bt_direct]
lt_direct = glob(root_path + '/LT/*')
lt_direct_name = [x[-5:] for x in lt_direct]

bt_images = {key: glob(name + '/*.png') for key, name in zip(bt_direct_name, bt_direct)}
lt_images = {key: glob(name + '/*.png') for key, name in zip(lt_direct_name, lt_direct)}

bt_dayes = {key: make_day_array(bt_images[key]) for key in bt_direct_name}
lt_dayes = {key: make_day_array(lt_images[key]) for key in lt_direct_name}

bt_dfs = []

for i in bt_direct_name:
    bt_df = pd.DataFrame({
        'file_name': bt_images[i],
        'day': bt_dayes[i],
        'species': 'bc',
        'version': i
    })
    bt_dfs.append(bt_df)
    
lt_dfs = []

for i in lt_direct_name:
    lt_df = pd.DataFrame({
        'file_name': lt_images[i],
        'day': lt_dayes[i],
        'species': 'lt',
        'version': i
    })
    lt_dfs.append(lt_df)

bf_dataframe = pd.concat(bt_dfs).reset_index(drop=True)
lt_dataframe = pd.concat(lt_dfs).reset_index(drop=True)

total_dataframe = pd.concat([bf_dataframe, lt_dataframe]).reset_index(drop=True)
total_dataframe

In [None]:
bt_combination = make_combination('bc', total_dataframe)
lt_combination = make_combination('lt', total_dataframe)

print(bt_combination.shape, lt_combination.shape)

In [None]:
bt_train = bt_combination
lt_train = lt_combination

train_set = pd.concat([bt_train, lt_train]).reset_index(drop=True)
train_set

In [None]:
tds = train_set.time_delta.unique()
split_ratio = 0.9
train_idx, valid_idx = [], []
for d in tds:
    total_idx = train_set[train_set.time_delta==d].index.values
    tmp_train_idx = np.random.choice(total_idx, int(len(total_idx)*split_ratio), replace=False).tolist()
    train_idx.extend(tmp_train_idx)
    valid_idx.extend([t for t in total_idx if t not in tmp_train_idx])
    print(f"time_delta :{d} and added train samples: {len(tmp_train_idx)} / valid samples: {len(total_idx) - len(tmp_train_idx)}")

In [None]:
print(len(train_idx), len(valid_idx))

In [None]:
train_df = train_set.loc[train_idx].reset_index(drop=True)
valid_df = train_set.loc[valid_idx].reset_index(drop=True)
print(train_df.shape, valid_df.shape)

In [None]:
image_size = (224, 224)
train_t = transforms.Compose([
            transforms.Resize(image_size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomResizedCrop(
                image_size, scale=(0.95, 1), ratio=(0.95, 1)
            ),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
valid_t = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

train_dataset = KistDataset(train_df, train_t)
valid_dataset = KistDataset(valid_df, valid_t)

In [None]:
pretrained_architecture = "google/vit-base-patch16-224-in21k" #'google/vit-base-patch16-224'

class UnitViTModel(nn.Module):

    def __init__(self):
        super(UnitViTModel, self).__init__()
        # self.feature_extractor = ViTFeatureExtractor.from_pretrained(pretrained_architecture)
        self.vit = ViTModel.from_pretrained(pretrained_architecture, output_attentions=False, add_pooling_layer=False)
        
    def forward(self, input):
        outputs = self.vit(input)
        seq_output = outputs[0][:, 0, :]
        return seq_output


class CompareNet(nn.Module):

    def __init__(self, vit_out_dim=768, num_classes=1):
        super(CompareNet, self).__init__()
        self.before_net = UnitViTModel()
        self.after_net = UnitViTModel()
        self.fc1 = nn.Linear(2*vit_out_dim, vit_out_dim)
        self.fc2 = nn.Linear(vit_out_dim, num_classes)
        
    def forward(self, before_input, after_input):
        before_seq = self.before_net(before_input)
        after_seq = self.after_net(after_input)
        seqs = torch.concat([before_seq, after_seq], axis=1)
        
        delta = self.fc1(seqs)
        delta = self.fc2(delta)
      
        return delta
    
    
class UnitViTModel_v2(nn.Module):

    def __init__(self):
        super(UnitViTModel_v2, self).__init__()
        # self.feature_extractor = ViTFeatureExtractor.from_pretrained(pretrained_architecture)
        self.vit = ViTModel.from_pretrained(pretrained_architecture, output_attentions=False, add_pooling_layer=False)
        
    def forward(self, input):
        outputs = self.vit(input)
        seq_output = outputs[0][:, 0, :]
        return seq_output


class CompareNet_v2(nn.Module):

    def __init__(self, vit_out_dim=768, d_model=512, nhead=4, nlayers=4, num_classes=1):
        super(CompareNet_v2, self).__init__()
        self.vit_encoder = UnitViTModel_v2()
        self.pooling = nn.AvgPool1d(kernel_size=3, stride=3)
        self.layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(self.layers, num_layers=nlayers)
        
        self.fc = nn.Linear(d_model, num_classes)
        self.fc1 = nn.Linear(2*vit_out_dim, vit_out_dim)
        self.fc2 = nn.Linear(vit_out_dim, num_classes)
        
    def forward(self, before_input, after_input):
        before_seq = self.vit_encoder(before_input)
        before_seq = self.pooling(before_seq)
        
        after_seq = self.vit_encoder(after_input)
        after_seq = self.pooling(after_seq)
        
        seqs = torch.concat([before_seq, after_seq], axis=1).unsqueeze(1)
        seqs = self.transformer(seqs)
        
        delta = self.fc(seqs.squeeze(1))
        #delta = self.fc2(delta)
      
        return delta
    
    
class CompareNet_v3(nn.Module):

    def __init__(self, vit_out_dim=768, d_model=128, nhead=4, nlayers=2, num_classes=1):
        super(CompareNet_v3, self).__init__()
        self.vit_encoder = UnitViTModel_v2()
        self.pooling = nn.AvgPool1d(kernel_size=3, stride=2)
        self.encoder = nn.Linear(1, d_model)
        
        self.layers = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead)
        self.transformer = nn.TransformerEncoder(self.layers, num_layers=nlayers)
        
        self.fc = nn.Linear((vit_out_dim-2), num_classes)
        
        
    def forward(self, before_input, after_input):
        before_seq = self.vit_encoder(before_input) #(B, 768)
        before_seq = self.pooling(before_seq) # (B, 383)
        
        after_seq = self.vit_encoder(after_input)
        after_seq = self.pooling(after_seq)
        
        seqs = torch.concat([before_seq, after_seq], axis=1).unsqueeze(2) #(B, 766, 1)
        seqs = self.encoder(seqs) # (B, 766, d_model=128)
        seqs = self.transformer(seqs)[:, :, 0] #(B, 766, d_model=128) -> (B, 766, 1)
        
        delta = self.fc(seqs) #(B, 1)
        #delta = self.fc2(delta)
      
        return delta

# Train

In [None]:
lr = 3e-5
epochs = 30
batch_size = 8
valid_batch_size = 64
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")

# model = CompareNet().to(device)
# model = CompareNet_v2().to(device)
model = CompareNet_v3().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.MSELoss()

In [None]:
train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)

print(len(train_data_loader), len(valid_data_loader))

In [None]:
import copy

best_val_loss, base_model_wts = 10e5, None
progress = tqdm(range(epochs))
for epoch in progress:
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)

        #train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
        #              torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss = criterion(logit.squeeze(1), time_delta.float())
        
        train_loss.backward()
        optimizer.step()
        if step % 100 == 0:
            print(f'[Epoch-step]: {epoch}-{step}, training loss : {train_loss.detach().cpu().numpy()}')     
        # progress.set_description(f"training loss: {train_loss.detach().cpu().numpy()}")

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            #valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
            #              torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_loss = criterion(logit.squeeze(1), valid_time_delta.float())
            valid_losses.append(valid_loss.detach().cpu())

    
    cur_val_loss = sum(valid_losses)/len(valid_losses)
    print(f'#### VALIDATION_LOSS : {cur_val_loss} ####')
    if cur_val_loss < best_val_loss:
        print("validation loss updated!")
        best_val_loss = cur_val_loss
        best_model_wts = copy.deepcopy(model.state_dict())
        ckpt = {
            'model': best_model_wts,
            'best_epoch': epoch,
            'best_val_loss': best_val_loss

        }
        torch.save(ckpt, 'v3_best_211212.pt')
        print(f'ckpt saved!')


# Test

In [None]:
model_ckpt = torch.load("/usr/src/coco/dacon/v3_best_211212.pt")

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# model = CompareNet().to(device)
# model = CompareNet_v2().to(device)
model = CompareNet_v3().to(device)
# model = CompareNet_v3(d_model=256, nlayers=4).to(device)

In [None]:
model.load_state_dict(model_ckpt["model"])
model.to(device)
print(f"best epoch: {model_ckpt['best_epoch']}, / best validation loss: {model_ckpt['best_val_loss'].numpy()}")

In [None]:
image_size = (224, 224)

test_t = transforms.Compose([
            transforms.Resize(image_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

In [None]:
test_csv_path = "/usr/src/coco/dacon/data/open/test_dataset/test_data.csv"

test_set = pd.read_csv(test_csv_path)
test_set['l_root'] = test_set['before_file_path'].map(lambda x: '/usr/src/coco/dacon/data/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: '/usr/src/coco/dacon/data/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['l_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['r_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_set['before_file_path'] = test_set['l_path']
test_set['after_file_path'] = test_set['r_path']
print(test_set.columns)
print(test_set.iloc[:1, :].values)

In [None]:
test_dataset = KistDataset(test_set, test_t, is_test=True)
test_data_loader = DataLoader(test_dataset, batch_size=64)

test_value = []
model.eval()
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        
        test_value.extend(value)

In [None]:
submission = pd.read_csv('/usr/src/coco/dacon/data/open/sample_submission.csv')
submission.head()

In [None]:
_sub = torch.FloatTensor(test_value)

__sub = _sub.numpy()
__sub[np.where(__sub<1)] = 1

submission['time_delta'] = __sub
submission.to_csv('result_v3_ep21.csv', index=False)