In [12]:
import torch
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from torch import optim
from torch import nn

from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms

import random
from glob import glob
import pandas as pd
import numpy as np
from PIL import Image

In [13]:
from glob import glob

import pandas as pd
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor
from torchvision import transforms


def extract_day(images):
    day = int(images.split('.')[-2][-2:])
    return day

def make_day_array(images):
    day_array = np.array([extract_day(x) for x in images])
    return day_array

def make_combination(length, species, data_frame, direct_name):
    before_file_path = []
    after_file_path = []
    time_delta = []

    for i in range(length):
        
        # 하위 폴더 중에서 랜덤하게 선택을 한다.
        direct = random.randrange(0,len(direct_name))
        # 위에서 결정된 폴더를 선택한다. 
        temp = data_frame[data_frame['version'] == direct_name[direct]]
    
        # 밑은 기존의 코드와 동일합니다.
        sample = temp[temp['species'] == species].sample(2)
        after = sample[sample['day'] == max(sample['day'])].reset_index(drop=True)
        before = sample[sample['day'] == min(sample['day'])].reset_index(drop=True)

        before_file_path.append(before.iloc[0]['file_name'])
        after_file_path.append(after.iloc[0]['file_name'])
        delta = int(after.iloc[0]['day'] - before.iloc[0]['day'])
        time_delta.append(delta)

    combination_df = pd.DataFrame({
        'before_file_path': before_file_path,
        'after_file_path': after_file_path,
        'time_delta': time_delta,
    })

    combination_df['species'] = species

    return combination_df

# 학습 데이터가 있는 폴더 위치
root_path = "./drive/MyDrive/Colab Notebooks/224size_train/"

# BC 폴더와 LT 폴더에 있는 하위 폴더를 저장한다.
bc_direct = glob(root_path + '/BC/*')
bc_direct_name = [x[-5:] for x in bc_direct]
lt_direct = glob(root_path + '/LT/*')
lt_direct_name = [x[-5:] for x in lt_direct]

# 하위 폴더에 있는 이미지들을 하위 폴더 이름과 매칭시켜서 저장한다.
bc_images = {key : glob(name + '/*.png') for key,name in zip(bc_direct_name, bc_direct)}
lt_images = {key : glob(name + '/*.png') for key,name in zip(lt_direct_name, lt_direct)}

# 하위 폴더에 있는 이미지들에서 날짜 정보만 따로 저장한다.
bc_dayes = {key : make_day_array(bc_images[key]) for key in bc_direct_name}
lt_dayes = {key : make_day_array(lt_images[key]) for key in lt_direct_name}

bc_dfs = []
for i in bc_direct_name:
    bc_df = pd.DataFrame({
        'file_name':bc_images[i],
        'day':bc_dayes[i],
        'species':'bc',
        'version':i
    })
    bc_dfs.append(bc_df)
    
lt_dfs = []
for i in lt_direct_name:
    lt_df = pd.DataFrame({
        'file_name':lt_images[i],
        'day':lt_dayes[i],
        'species':'lt',
        'version':i
    })
    lt_dfs.append(lt_df)

bc_dataframe = pd.concat(bc_dfs).reset_index(drop=True)
lt_dataframe = pd.concat(lt_dfs).reset_index(drop=True)
total_dataframe = pd.concat([bc_dataframe, lt_dataframe]).reset_index(drop=True)


bc_combination = make_combination(5000, 'bc', total_dataframe, bc_direct_name)
lt_combination = make_combination(5000, 'lt', total_dataframe, lt_direct_name)

bc_train = bc_combination.iloc[:4500]
bc_valid = bc_combination.iloc[4500:]

lt_train = lt_combination.iloc[:4500]
lt_valid = lt_combination.iloc[4500:]

train_set = pd.concat([bc_train, lt_train])
valid_set = pd.concat([bc_valid, lt_valid])

class KistDataset(Dataset):
    def __init__(self, combination_df, is_test= None):
        self.combination_df = combination_df
        self.transform = transforms.Compose([
            transforms.ToTensor()
        ])
        self.is_test = is_test

    def __getitem__(self, idx):
        before_image = Image.open(self.combination_df.iloc[idx]['before_file_path'])
        after_image = Image.open(self.combination_df.iloc[idx]['after_file_path'])

        before_image = self.transform(before_image)
        after_image = self.transform(after_image)
        if self.is_test:
            return before_image, after_image
        time_delta = self.combination_df.iloc[idx]['time_delta']
        return before_image, after_image, time_delta

    def __len__(self):
        return len(self.combination_df)

In [3]:
import torch
from torch import nn
from torchvision.models import mobilenet_v2


class CompareCNN(nn.Module):

    def __init__(self):
        super(CompareCNN, self).__init__()
        self.mobile_net = mobilenet_v2(pretrained=True)
        self.fc_layer = nn.Linear(1000, 1)

    def forward(self, input):
        x = self.mobile_net(input)
        output = self.fc_layer(x)
        return output


class CompareNet(nn.Module):

    def __init__(self):
        super(CompareNet, self).__init__()
        self.before_net = CompareCNN()
        self.after_net = CompareCNN()

    def forward(self, before_input, after_input):
        before = self.before_net(before_input)
        after = self.after_net(after_input)
        delta = before - after
        return delta

In [14]:
def seed_everything(seed): # seed 고정
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)


seed_everything(2048)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
lr = 1e-5
epochs = 10
batch_size = 32
valid_batch_size = 50

model = CompareNet().to(device)



In [15]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [17]:
train_dataset = KistDataset(train_set)
valid_dataset = KistDataset(valid_set)

optimizer = optim.Adam(model.parameters(), lr=lr)

train_data_loader = DataLoader(train_dataset,
                               batch_size=batch_size,
                               shuffle=True)

valid_data_loader = DataLoader(valid_dataset,
                               batch_size=valid_batch_size)


for epoch in tqdm(range(epochs)):
    for step, (before_image, after_image, time_delta) in tqdm(enumerate(train_data_loader)):
        before_image = before_image.to(device)
        after_image = after_image.to(device)
        time_delta = time_delta.to(device)

        optimizer.zero_grad()
        logit = model(before_image, after_image)
        train_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - time_delta.float())) /
                      torch.LongTensor([batch_size]).squeeze(0).to(device))
        train_loss.backward()
        optimizer.step()

        if step % 15 == 0:
            print('\n=====================loss=======================')
            print(f'\n=====================EPOCH: {epoch}=======================')
            print(f'\n=====================step: {step}=======================')
            print('MAE_loss : ', train_loss.detach().cpu().numpy())

    valid_losses = []
    with torch.no_grad():
        for valid_before, valid_after, time_delta in tqdm(valid_data_loader):
            valid_before = valid_before.to(device)
            valid_after = valid_after.to(device)
            valid_time_delta = time_delta.to(device)


            logit = model(valid_before, valid_after)
            valid_loss = (torch.sum(torch.abs(logit.squeeze(1).float() - valid_time_delta.float())) /
                          torch.LongTensor([valid_batch_size]).squeeze(0).to(device))
            valid_losses.append(valid_loss.detach().cpu())


    print(f'VALIDATION_LOSS MAE : {sum(valid_losses)/len(valid_losses)}')
    checkpoiont = {
        'model': model.state_dict(),

    }

    torch.save(checkpoiont, './drive/MyDrive/Colab Notebooks/baseline_224.pt')

  0%|          | 0/10 [00:00<?, ?it/s]

0it [00:00, ?it/s]




MAE_loss :  10.882977



MAE_loss :  7.327464



MAE_loss :  10.384344



MAE_loss :  3.9873726



MAE_loss :  3.2046828



MAE_loss :  3.223936



MAE_loss :  2.392628



MAE_loss :  1.6516726



MAE_loss :  3.1256719



MAE_loss :  3.602121



MAE_loss :  3.0470629



MAE_loss :  1.7876787



MAE_loss :  2.07438



MAE_loss :  1.2401395



MAE_loss :  2.8096523



MAE_loss :  3.740333



MAE_loss :  2.166033



MAE_loss :  2.6638649



MAE_loss :  1.8501289


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.8741462230682373


0it [00:00, ?it/s]




MAE_loss :  3.9195518



MAE_loss :  1.1991814



MAE_loss :  1.4322827



MAE_loss :  1.0089386



MAE_loss :  1.3639944



MAE_loss :  2.1197734



MAE_loss :  2.233864



MAE_loss :  1.8680857



MAE_loss :  1.1841722



MAE_loss :  1.3150793



MAE_loss :  1.2515892



MAE_loss :  2.1310563



MAE_loss :  2.7357454



MAE_loss :  1.6966503



MAE_loss :  1.8813715



MAE_loss :  2.411161



MAE_loss :  2.6284938



MAE_loss :  4.3894033



MAE_loss :  2.267016


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.7546323537826538


0it [00:00, ?it/s]




MAE_loss :  3.3645344



MAE_loss :  1.3536737



MAE_loss :  1.7060468



MAE_loss :  3.5245748



MAE_loss :  0.9509311



MAE_loss :  1.1881204



MAE_loss :  1.4105567



MAE_loss :  3.0737472



MAE_loss :  0.8477055



MAE_loss :  1.9642234



MAE_loss :  1.5471785



MAE_loss :  1.4922214



MAE_loss :  2.2128131



MAE_loss :  1.2408493



MAE_loss :  1.061677



MAE_loss :  1.3812275



MAE_loss :  2.3699877



MAE_loss :  1.1574292



MAE_loss :  1.4445174


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5696821212768555


0it [00:00, ?it/s]




MAE_loss :  1.3714416



MAE_loss :  1.1608453



MAE_loss :  2.8569016



MAE_loss :  2.5578752



MAE_loss :  1.0039756



MAE_loss :  2.0179586



MAE_loss :  1.0841937



MAE_loss :  2.6771429



MAE_loss :  1.0390545



MAE_loss :  0.96033233



MAE_loss :  2.0968506



MAE_loss :  1.3132489



MAE_loss :  1.4481752



MAE_loss :  2.4395478



MAE_loss :  1.5144855



MAE_loss :  1.5051329



MAE_loss :  1.5137911



MAE_loss :  1.0811529



MAE_loss :  0.87885576


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5232529640197754


0it [00:00, ?it/s]




MAE_loss :  1.1648238



MAE_loss :  1.6007602



MAE_loss :  1.3428556



MAE_loss :  1.8659644



MAE_loss :  1.1829824



MAE_loss :  2.6291509



MAE_loss :  1.774895



MAE_loss :  1.2701595



MAE_loss :  1.1485689



MAE_loss :  1.7774916



MAE_loss :  1.0837635



MAE_loss :  1.9772786



MAE_loss :  1.2213051



MAE_loss :  2.0476415



MAE_loss :  1.7427003



MAE_loss :  0.87699443



MAE_loss :  1.1839819



MAE_loss :  3.957419



MAE_loss :  2.465242


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5423266887664795


0it [00:00, ?it/s]




MAE_loss :  0.9804405



MAE_loss :  1.4017404



MAE_loss :  0.9052363



MAE_loss :  1.0745664



MAE_loss :  1.0627084



MAE_loss :  1.1669481



MAE_loss :  4.1919174



MAE_loss :  1.4590337



MAE_loss :  1.2052717



MAE_loss :  0.8433418



MAE_loss :  0.9387476



MAE_loss :  1.8902403



MAE_loss :  0.99758273



MAE_loss :  2.032579



MAE_loss :  2.4841833



MAE_loss :  1.3462204



MAE_loss :  0.7863319



MAE_loss :  1.2286602



MAE_loss :  1.6873713


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.477586030960083


0it [00:00, ?it/s]




MAE_loss :  2.4252305



MAE_loss :  2.772913



MAE_loss :  3.1351194



MAE_loss :  1.1071011



MAE_loss :  1.0472407



MAE_loss :  1.5282737



MAE_loss :  4.043169



MAE_loss :  1.3641462



MAE_loss :  1.3567188



MAE_loss :  1.7667335



MAE_loss :  0.77220935



MAE_loss :  0.7799005



MAE_loss :  1.0076433



MAE_loss :  1.4550934



MAE_loss :  1.2626128



MAE_loss :  4.0900493



MAE_loss :  2.4239678



MAE_loss :  1.2657266



MAE_loss :  2.1946034


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.4263991117477417


0it [00:00, ?it/s]




MAE_loss :  1.6405661



MAE_loss :  1.0678859



MAE_loss :  1.5120146



MAE_loss :  0.853827



MAE_loss :  1.2968514



MAE_loss :  1.0409112



MAE_loss :  1.4947554



MAE_loss :  1.1588068



MAE_loss :  0.92029005



MAE_loss :  1.1653973



MAE_loss :  3.9618325



MAE_loss :  1.2067803



MAE_loss :  1.2728568



MAE_loss :  3.7078836



MAE_loss :  1.297364



MAE_loss :  0.7099594



MAE_loss :  2.4036615



MAE_loss :  2.3206472



MAE_loss :  0.974353


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.426908016204834


0it [00:00, ?it/s]




MAE_loss :  3.4511518



MAE_loss :  2.502061



MAE_loss :  1.0399082



MAE_loss :  0.9361341



MAE_loss :  1.147721



MAE_loss :  1.5462162



MAE_loss :  2.050036



MAE_loss :  0.79145044



MAE_loss :  0.7595984



MAE_loss :  3.8098526



MAE_loss :  0.9854796



MAE_loss :  1.0826454



MAE_loss :  1.9842854



MAE_loss :  0.7825968



MAE_loss :  2.602128



MAE_loss :  2.106565



MAE_loss :  2.1846814



MAE_loss :  0.9960879



MAE_loss :  1.4834629


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.5945854187011719


0it [00:00, ?it/s]




MAE_loss :  1.74228



MAE_loss :  1.8147683



MAE_loss :  1.7598441



MAE_loss :  1.7345085



MAE_loss :  1.7928784



MAE_loss :  2.6890576



MAE_loss :  1.1186613



MAE_loss :  1.371343



MAE_loss :  1.607285



MAE_loss :  2.6025083



MAE_loss :  0.9386722



MAE_loss :  1.0251806



MAE_loss :  2.783731



MAE_loss :  1.2417135



MAE_loss :  0.8810644



MAE_loss :  1.9637516



MAE_loss :  3.2095857



MAE_loss :  3.2434776



MAE_loss :  1.5105364


  0%|          | 0/20 [00:00<?, ?it/s]

VALIDATION_LOSS MAE : 1.446001648902893


In [18]:
test_set = pd.read_csv('./drive/MyDrive/Colab Notebooks/224size_test/test_data.csv')
test_set['l_root'] = test_set['before_file_path'].map(lambda x: './drive/MyDrive/Colab Notebooks/224size_test/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['r_root'] = test_set['after_file_path'].map(lambda x: './drive/MyDrive/Colab Notebooks/224size_test/' + x.split('_')[1] + '/' + x.split('_')[2])
test_set['before_file_path'] = test_set['l_root'] + '/' + test_set['before_file_path'] + '.png'
test_set['after_file_path'] = test_set['r_root'] + '/' + test_set['after_file_path'] + '.png'


test_dataset = KistDataset(test_set, is_test=True)
test_data_loader = DataLoader(test_dataset,
                               batch_size=32)

test_set

Unnamed: 0,idx,before_file_path,after_file_path,l_root,r_root
0,0,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...
1,1,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...
2,2,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...
3,3,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...
4,4,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...
...,...,...,...,...,...
3955,3955,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...
3956,3956,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...,./drive/MyDrive/Colab Notebooks/224size_test/L...
3957,3957,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...
3958,3958,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...,./drive/MyDrive/Colab Notebooks/224size_test/B...


In [19]:
test_value = []
with torch.no_grad():
    for test_before, test_after in tqdm(test_data_loader):
        test_before = test_before.to(device)
        test_after = test_after.to(device)
        logit = model(test_before, test_after)
        value = logit.squeeze(1).detach().cpu().float()
        
        test_value.extend(value)

  0%|          | 0/124 [00:00<?, ?it/s]

In [20]:
test_value

[tensor(25.4375),
 tensor(27.7689),
 tensor(5.0285),
 tensor(8.2295),
 tensor(26.8082),
 tensor(24.1758),
 tensor(20.8403),
 tensor(15.0483),
 tensor(5.3074),
 tensor(32.6129),
 tensor(9.0649),
 tensor(9.9231),
 tensor(5.0774),
 tensor(11.8635),
 tensor(13.3530),
 tensor(9.6278),
 tensor(2.3080),
 tensor(2.5946),
 tensor(21.0652),
 tensor(12.1836),
 tensor(5.1464),
 tensor(5.4038),
 tensor(20.1064),
 tensor(5.4505),
 tensor(26.6230),
 tensor(7.5329),
 tensor(9.0702),
 tensor(28.2488),
 tensor(17.9918),
 tensor(2.9512),
 tensor(10.5150),
 tensor(9.1991),
 tensor(5.0424),
 tensor(35.2403),
 tensor(16.2689),
 tensor(13.7525),
 tensor(23.9416),
 tensor(0.7432),
 tensor(31.6978),
 tensor(5.9746),
 tensor(8.8931),
 tensor(19.0402),
 tensor(13.8430),
 tensor(-0.9867),
 tensor(10.1257),
 tensor(18.0640),
 tensor(7.6099),
 tensor(17.1768),
 tensor(28.9567),
 tensor(2.3281),
 tensor(7.5719),
 tensor(25.9332),
 tensor(18.9950),
 tensor(17.9677),
 tensor(6.2483),
 tensor(22.5325),
 tensor(3.1227),

In [21]:
# 모델 불러오기
model = CompareNet() # 모델 초기화

#evice = torch.device('cpu')
checkpoint = torch.load('./drive/MyDrive/Colab Notebooks/baseline_224.pt', map_location=device)
model.load_state_dict(checkpoint['model'])

model.eval() # 드롭아웃 및 배치 정규화를 평가

CompareNet(
  (before_net): CompareCNN(
    (mobile_net): MobileNetV2(
      (features): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): InvertedResidual(
          (conv): Sequential(
            (0): ConvNormActivation(
              (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
              (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              (2): ReLU6(inplace=True)
            )
            (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          )
        )
        (2): InvertedResidual(
          (conv): Sequential(
            (0): 

In [22]:
# submission 형식을 불러온다.
submission = pd.read_csv('./drive/MyDrive/Colab Notebooks/sample_submission.csv')

# 예측한 값들은 텐서 형태로 변환 시켜준다.
predict = torch.FloatTensor(test_value)

# 음수의 값을 갖는 모든 값들을 1 Day 차이가 발생하도록 바꿔줌
temp_predict = predict.numpy()
temp_predict[np.where(temp_predict<1)] = 1

array([25.437496, 27.768913,  5.028455, ..., 18.96547 , 11.061118,
        8.11888 ], dtype=float32)

In [23]:
# 모델의 예측 값을 저장함
submission['time_delta'] = temp_predict
submission.to_csv('./drive/MyDrive/Colab Notebooks/baseline_224.csv', index=False)

In [None]:
/content/drive/MyDrive/Colab Notebooks/open