## 사용 package 선언

In [1]:
from google.colab import drive
drive.mount('/content/drive')
!nvidia-smi

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Sat Dec  4 12:19:29 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                       

In [2]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

## 데이터관련 함수 정의 및 데이터셋 선언

In [3]:
from glob import glob

import pandas as pd
import numpy as np
!pip install transformers
import transformers
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms


def extract_day(file_name):
    day = int(file_name.split('.')[-2][-2:])
    return day


def make_day_array(image_pathes):
    day_array = np.array([extract_day(file_name) for file_name in image_pathes])
    return day_array


def make_image_path_array(root_path=None):
    if root_path is None:
        bc_directories = glob('/content/drive/MyDrive/growth/open/train_dataset/BC/*')
        lt_directories = glob('/content/drive/MyDrive/growth/open/train_dataset/LT/*')

    else:
        bc_directories = glob(root_path + 'BC/*')
        lt_directories = glob(root_path + 'LT/*')

    bc_image_path = []
    for bc_path in bc_directories:
        images = glob(bc_path + '/*.png')
        bc_image_path.extend(images)

    lt_image_path = []
    for lt_path in lt_directories:
        images = glob(lt_path + '/*.png')
        lt_image_path.extend(images)

    return bc_image_path, lt_image_path


def make_dataframe(root_path=None):
    bc_image_path, lt_image_path = make_image_path_array(root_path)
    bc_day_array = make_day_array(bc_image_path)
    lt_day_array = make_day_array(lt_image_path)

    bc_df = pd.DataFrame({'file_name': bc_image_path,
                          'day': bc_day_array})
    bc_df['species'] = 'bc'

    lt_df = pd.DataFrame({'file_name': lt_image_path,
                          'day': lt_day_array})
    lt_df['species'] = 'lt'

    total_data_frame = pd.concat([bc_df, lt_df]).reset_index(drop=True)

    return total_data_frame


def make_combination(length, species, data_frame):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        sample = data_frame[data_frame['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df


class KistDataset(Dataset):
    def __init__(self, combination_df, is_test= None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([
            transforms.Resize(224),
            transforms.ToTensor()
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

Collecting transformers
  Downloading transformers-4.12.5-py3-none-any.whl (3.1 MB)
[K     |████████████████████████████████| 3.1 MB 15.8 MB/s 
[?25hCollecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 72.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 67.0 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.2.1-py3-none-any.whl (61 kB)
[K     |████████████████████████████████| 61 kB 577 kB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.46-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 83.7 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Atte

## 모델선언

In [4]:
import torch
from torch import nn
import torchvision.models
from torchvision.models import mobilenet_v2


class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)

    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output


class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta


In [5]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2048)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-5
epochs = 8
batch_size = 64
valid_batch_size = 50

model = CompareNet().to(device)

total_dataframe = make_dataframe(root_path = "/content/drive/MyDrive/growth/open/train_dataset/")
bt_combination = make_combination(5000, 'bc', total_dataframe)
lt_combination = make_combination(5000, 'lt', total_dataframe)

bt_train = bt_combination.iloc[:4500]
bt_valid = bt_combination.iloc[4500:]

lt_train = lt_combination.iloc[:4500]
lt_valid = lt_combination.iloc[4500:]

train_set = pd.concat([bt_train, lt_train])
valid_set = pd.concat([bt_valid, lt_valid])



train_dataset = KistDataset(train_set)
valid_dataset = KistDataset(valid_set)

optimizer = optim.Adam(model.parameters(), lr=lr)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)


for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)
        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
                      torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 15 == 0:
            print('\n=====================loss=======================')
            print(f'\n=====================EPOCH: {epoch}=======================')
            print(f'\n=====================step: {step}=======================')
            print('MAE_loss : ', train_loss.detach().cpu().numpy())

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())


    print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')
    checkpoiont = {
        'model': model.state_dict(),

    }

    torch.save(checkpoiont, 'checkpoiont_128.pt')



Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

  0%|          | 0/8 [00:00<?, ?it/s]

0it [00:00, ?it/s]




MAE_loss :  12.307783



MAE_loss :  9.539959



MAE_loss :  6.4969454



MAE_loss :  3.8399792



MAE_loss :  3.9421895



MAE_loss :  3.51993



MAE_loss :  3.3858092



MAE_loss :  2.452427



MAE_loss :  1.9170568



MAE_loss :  2.2475445


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.778559684753418


0it [00:00, ?it/s]




MAE_loss :  2.2766962



MAE_loss :  1.6758975



MAE_loss :  2.171527



MAE_loss :  2.2067785



MAE_loss :  1.8240381



MAE_loss :  1.7612762



MAE_loss :  1.6013199



MAE_loss :  1.4845253



MAE_loss :  1.4192978



MAE_loss :  1.5287607


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 2.16227650642395


0it [00:00, ?it/s]




MAE_loss :  1.6549861



MAE_loss :  1.9526696



MAE_loss :  1.799975



MAE_loss :  1.6396273



MAE_loss :  1.5565059



MAE_loss :  1.614793



MAE_loss :  1.2396293



MAE_loss :  0.9920076



MAE_loss :  1.4777217



MAE_loss :  1.0804672


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9492075443267822


0it [00:00, ?it/s]




MAE_loss :  0.96543974



MAE_loss :  1.7103571



MAE_loss :  1.1257123



MAE_loss :  1.7852364



MAE_loss :  1.5923193



MAE_loss :  1.0765021



MAE_loss :  0.8999284



MAE_loss :  1.3691516



MAE_loss :  1.3872614



MAE_loss :  0.99875325


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.9044663906097412


0it [00:00, ?it/s]




MAE_loss :  1.1332446



MAE_loss :  1.1804792



MAE_loss :  0.9324328



MAE_loss :  0.93450546



MAE_loss :  0.9522133



MAE_loss :  1.0696925



MAE_loss :  1.0529792



MAE_loss :  1.0957355



MAE_loss :  1.7111084



MAE_loss :  1.3431535


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.8689649105072021


0it [00:00, ?it/s]




MAE_loss :  1.449033



MAE_loss :  1.5702424



MAE_loss :  1.3256875



MAE_loss :  0.88277125



MAE_loss :  1.0492425



MAE_loss :  1.022638



MAE_loss :  1.2986091



MAE_loss :  1.7198887



MAE_loss :  0.92659426



MAE_loss :  0.80137354


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6624536514282227


0it [00:00, ?it/s]




MAE_loss :  1.0097158



MAE_loss :  0.79424596



MAE_loss :  1.0444748



MAE_loss :  2.9234972



MAE_loss :  1.444697



MAE_loss :  2.4958596



MAE_loss :  1.1156346



MAE_loss :  2.601606



MAE_loss :  2.5293036



MAE_loss :  2.5179386


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.6623512506484985


0it [00:00, ?it/s]




MAE_loss :  2.7672067



MAE_loss :  0.76216906



MAE_loss :  0.90755516



MAE_loss :  1.1351275



MAE_loss :  0.86348546



MAE_loss :  0.7872913



MAE_loss :  0.75482845



MAE_loss :  0.76295257



MAE_loss :  1.2380383



MAE_loss :  1.1977947


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.607200026512146


In [6]:
test_set = pd.read_csv('/content/drive/MyDrive/growth/open/test_dataset/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: '/content/drive/MyDrive/growth/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: '/content/drive/MyDrive/growth/open/test_dataset/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['l_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['r_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'
test_dataset = KistDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset,
                               batch_size=64)
# test_value = []
# with torch.no_grad():
#     for test_before, test_after in tqdm(test_data_loader):
#         test_before = test_before.to(device)
#         test_after = test_after.to(device)
#         logit = model(test_before, test_after)
#         value = logit.squeeze(1).detach().cpu().float()
        
#         test_value.extend(value)

In [7]:
test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()


        test_value.extend(value)

  0%|          | 0/62 [00:00<?, ?it/s]

FileNotFoundError: ignored

In [None]:
sample = pd.read_csv('/content/drive/MyDrive/growth/open/sample_submission.csv')
sample['time_delta'] = test_value
sample['time_delta'] = sample['time_delta'].apply(lambda x: np.array(x))
sample.to_csv('/content/drive/MyDrive/growth/open/baseline.csv', index = False)