# augmentation을 적용한 함수 dl모델을 제작

데이터 증강 함수
- A.Resize(height=256, width=256) : 사이즈 변경
- A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) : 이미지 픽셀 정규화
- A.CoarseDropout(max_holes=16, max_height=16, max_width=16, min_holes=1, min_height=16, min_width=16) : 픽셀 드롭아웃(구멍뚫기)
- A.Rotate(limit=(115, 115), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1.0) : 비율 유지하여 회전, 빈공간 흰색
- A.VerticalFlip(always_apply=False, p=0.5) : 좌우반전
- A.HorizontalFlip(always_apply=False, p=1.0) : 상하반전
- A.Blur(always_apply=True, p=1.0, blur_limit=(3, 7)) : 블러
- A.GaussNoise(always_apply=False, p=1.0, var_limit=(10.0, 50.0)) : 가우시안, 되는지 의문
- A.Downscale(always_apply=False, p=1.0, scale_min=0.35, scale_max=0.35, interpolation=0) : 픽셀다운, 모자이크
- 믹스 추가필요

In [10]:
import os
import time
import random

import timm
import torch
import albumentations as A
import pandas as pd
import numpy as np
import torch.nn as nn
from albumentations.pytorch import ToTensorV2
from torch.optim import Adam
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from PIL import Image
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

import cv2
import matplotlib.pyplot as plt

In [2]:
# 시드를 고정합니다.
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

In [3]:
# 데이터셋 클래스를 정의합니다.
class ImageDataset(Dataset):
    def __init__(self, csv, path, transform=None):
        self.df = pd.read_csv(csv).values
        self.path = path
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        name, target = self.df[idx]
        img = np.array(Image.open(os.path.join(self.path, name)))
        if self.transform:
            img = self.transform(image=img)['image']
        return img, target

In [37]:
# one epoch 학습을 위한 함수입니다.
def train_one_epoch(loader, vaild_loader, model, optimizer, loss_fn, device):
    model.train()
    train_loss = 0
    preds_list = []
    targets_list = []
    vaild_preds_list = []
    vaild_target_list = []

    pbar = tqdm(loader)
    for image, targets in pbar:
        image = image.float().to(device)
        targets = targets.to(device)

        model.zero_grad(set_to_none=True)

        preds = model(image)
        loss = loss_fn(preds, targets)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        targets_list.extend(targets.detach().cpu().numpy())

        pbar.set_description(f"Loss: {loss.item():.4f}")

    train_loss /= len(loader)
    train_acc = accuracy_score(targets_list, preds_list)
    train_f1 = f1_score(targets_list, preds_list, average='macro')

    # vaildation set predict
    model.eval()
    for image, target in tqdm(vaild_loader):
        image = image.float().to(device)

        with torch.no_grad():
            preds = model(image)
        vaild_preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
        vaild_target_list.extend(target.detach().cpu().numpy())
    vaild_acc = accuracy_score(vaild_target_list, vaild_preds_list)
    vaild_f1 = f1_score(vaild_target_list, vaild_preds_list, average='macro')

    ret = {
        "train_loss": train_loss,
        "train_acc": train_acc,
        "train_f1": train_f1,
        "vaild_acc": vaild_acc,
        "vaild_f1": vaild_f1,
    }

    return ret

In [None]:
# vaild_preds_list = []
# vaild_target_list = []

# model.eval()
# for image, target in tqdm(vaild_loader):
#     image = image.float().to(device)

#     with torch.no_grad():
#         preds = model(image)
#     vaild_preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
#     vaild_target_list.extend(target.detach().cpu().numpy())
# vaild_acc = accuracy_score(vaild_target_list, preds_list)
# vaild_f1 = f1_score(vaild_target_list, preds_list, average='macro')

In [23]:
# device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# data config
data_path = '../datasets_fin/'

# model config
model_name = 'resnet34' # 'resnet50' 'efficientnet-b0', ...

# training config
img_size = 256
LR = 1e-3
EPOCHS = 20
BATCH_SIZE = 32
num_workers = 0

- 일단 단일 augmentation들로 확인
- 성능향상시 다양한 데이터증강을 조합하여 추가학습

In [56]:
# augmentation을 위한 transform 코드
trn_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])
trn_transform_coarseDropout = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.CoarseDropout(max_holes=30, max_height=16, max_width=16, min_holes=10, min_height=16, min_width=16, p=1),
    ToTensorV2(),
])
trn_transform_rotate45 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(45, 45), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1),
    ToTensorV2(),
])
trn_transform_rotate90 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 
    A.Rotate(limit=(90, 90), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1),
    ToTensorV2(),
])
trn_transform_rotate135 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(135, 135), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1),
    ToTensorV2(),
])
trn_transform_rotate180 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(180, 180), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1),
    ToTensorV2(),
])
trn_transform_rotate225 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(225, 225), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1.0),
    ToTensorV2(),
])
trn_transform_rotate270 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(270, 270), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1.0),
    ToTensorV2(),
])
trn_transform_rotate315 = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Rotate(limit=(315, 315), border_mode=cv2.BORDER_CONSTANT,value=[255, 255, 255], p=1.0),
    ToTensorV2(),
])
trn_transform_verticalflip = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.VerticalFlip(always_apply=False, p=1.0),
    ToTensorV2(),
])
trn_transform_horizontalflip = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.HorizontalFlip(always_apply=False, p=1.0),
    ToTensorV2(),
])
trn_transform_blur = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Blur(always_apply=True, p=1.0, blur_limit=(3, 3)),
    ToTensorV2(),
])
trn_transform_gaussnoise = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.GaussNoise(always_apply=False, p=1.0, var_limit=(500.0, 500.0)),
    ToTensorV2(),
])
trn_transform_downscale = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    A.Downscale(always_apply=False, p=1.0, scale_min=0.5, scale_max=0.5, interpolation=0),
    ToTensorV2(),
])
transform_list = [trn_transform, trn_transform_coarseDropout, trn_transform_rotate45, trn_transform_rotate90, trn_transform_rotate135, trn_transform_rotate180, trn_transform_rotate225,
                  trn_transform_rotate270, trn_transform_rotate315, trn_transform_verticalflip, trn_transform_horizontalflip, trn_transform_blur, trn_transform_gaussnoise,
                  trn_transform_downscale]

# test image 변환을 위한 transform 코드
tst_transform = A.Compose([
    A.Resize(height=img_size, width=img_size),
    A.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ToTensorV2(),
])

In [57]:
# Dataset 정의를 위한 함수
def make_trn_dataset(transform_list, csv, path):
    trn_dataset = ImageDataset(
        csv,
        path,
        transform=transform_list[0]
    )
    for transform in transform_list[1:]:
        trn_dataset2 = ImageDataset(
            csv,
            path,
            transform=transform
        )
        trn_dataset = ConcatDataset([trn_dataset, trn_dataset2])

    return trn_dataset

In [58]:
# vaildation set 정의
def make_vaild_dataset(transform_list, csv, path):
    trn_dataset = ImageDataset(
        csv,
        path,
        transform=transform_list[0]
    )
    for transform in transform_list[1:]:
        trn_dataset2 = ImageDataset(
            csv,
            path,
            transform=transform
        )
        trn_dataset = ConcatDataset([trn_dataset, trn_dataset2])

    return trn_dataset

In [59]:
trn_dataset = make_trn_dataset(transform_list, "../datasets_fin/divided_train.csv", "../datasets_fin/train/")
vaild_dataset = make_vaild_dataset(transform_list, "../datasets_fin/vaild.csv", "../datasets_fin/train/",)
tst_dataset = ImageDataset(
    "../datasets_fin/test.csv",
    "../datasets_fin/train/",
    transform=tst_transform
)
print(len(trn_dataset), len(vaild_dataset), len(tst_dataset))

17584 2198 157


In [60]:
# DataLoader 정의
trn_loader = DataLoader(
    trn_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    # shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
vaild_loader = DataLoader(
    vaild_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    # shuffle=True,
    num_workers=num_workers,
    pin_memory=True,
    drop_last=False
)
tst_loader = DataLoader(
    tst_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

In [68]:
# load model
model = timm.create_model(
    model_name,
    pretrained=True,
    num_classes=17
).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LR)

In [69]:
early_stop = 0
best_model = model
best_epoch = 0
best_vaild_f1 = 0
for epoch in range(EPOCHS):
   if early_stop > 2:
      torch.save(best_model, '../model/best_augmentation_'+str(img_size)+'_'+str(best_epoch)+'.pt')
      break
   ret = train_one_epoch(trn_loader, vaild_loader, model, optimizer, loss_fn, device=device)
   ret['epoch'] = epoch
   if best_vaild_f1 < ret['vaild_f1']:
      best_vaild_f1 = ret['vaild_f1']
      best_model = model
      best_epoch = epoch+1
      early_stop = 0
   else:
      early_stop += 1

   print(early_stop)

   log = ""
   for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
   print(log)

Loss: 0.6891: 100%|██████████| 550/550 [01:47<00:00,  5.13it/s]
100%|██████████| 69/69 [00:10<00:00,  6.41it/s]


0
train_loss: 0.8928
train_acc: 0.7000
train_f1: 0.6780
vaild_acc: 0.7520
vaild_f1: 0.7411
epoch: 0.0000



Loss: 0.1688: 100%|██████████| 550/550 [01:47<00:00,  5.11it/s]
100%|██████████| 69/69 [00:10<00:00,  6.29it/s]


0
train_loss: 0.4026
train_acc: 0.8569
train_f1: 0.8544
vaild_acc: 0.7630
vaild_f1: 0.7412
epoch: 1.0000



Loss: 0.3147: 100%|██████████| 550/550 [01:47<00:00,  5.10it/s]
100%|██████████| 69/69 [00:11<00:00,  6.19it/s]


0
train_loss: 0.3123
train_acc: 0.8921
train_f1: 0.8930
vaild_acc: 0.8330
vaild_f1: 0.8329
epoch: 2.0000



Loss: 0.3922: 100%|██████████| 550/550 [01:47<00:00,  5.12it/s]
100%|██████████| 69/69 [00:10<00:00,  6.40it/s]


0
train_loss: 0.2768
train_acc: 0.9054
train_f1: 0.9074
vaild_acc: 0.8335
vaild_f1: 0.8392
epoch: 3.0000



Loss: 0.1966: 100%|██████████| 550/550 [01:47<00:00,  5.10it/s]
100%|██████████| 69/69 [00:10<00:00,  6.38it/s]


1
train_loss: 0.2525
train_acc: 0.9134
train_f1: 0.9155
vaild_acc: 0.8285
vaild_f1: 0.8367
epoch: 4.0000



Loss: 0.2137: 100%|██████████| 550/550 [01:47<00:00,  5.11it/s]
100%|██████████| 69/69 [00:10<00:00,  6.35it/s]


2
train_loss: 0.2427
train_acc: 0.9187
train_f1: 0.9226
vaild_acc: 0.8044
vaild_f1: 0.8145
epoch: 5.0000



Loss: 0.6473: 100%|██████████| 550/550 [01:47<00:00,  5.10it/s]
100%|██████████| 69/69 [00:10<00:00,  6.33it/s]

3
train_loss: 0.2409
train_acc: 0.9194
train_f1: 0.9220
vaild_acc: 0.8203
vaild_f1: 0.8257
epoch: 6.0000






In [49]:
for epoch in range(20):
    ret = train_one_epoch(trn_loader, vaild_loader, model, optimizer, loss_fn, device=device)
    ret['epoch'] = epoch

    log = ""
    for k, v in ret.items():
      log += f"{k}: {v:.4f}\n"
    print(log)

Loss: 0.5597: 100%|██████████| 550/550 [01:11<00:00,  7.70it/s]
100%|██████████| 69/69 [00:07<00:00,  8.74it/s]


train_loss: 0.0916
train_acc: 0.9693
train_f1: 0.9679
vaild_acc: 0.8430
vaild_f1: 0.8393
epoch: 0.0000



Loss: 0.0250: 100%|██████████| 550/550 [01:11<00:00,  7.67it/s]
100%|██████████| 69/69 [00:07<00:00,  8.72it/s]


train_loss: 0.0764
train_acc: 0.9738
train_f1: 0.9731
vaild_acc: 0.8749
vaild_f1: 0.8683
epoch: 1.0000



Loss: 0.0036: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.76it/s]


train_loss: 0.0566
train_acc: 0.9816
train_f1: 0.9805
vaild_acc: 0.8726
vaild_f1: 0.8645
epoch: 2.0000



Loss: 0.0695: 100%|██████████| 550/550 [01:11<00:00,  7.69it/s]
100%|██████████| 69/69 [00:07<00:00,  8.71it/s]


train_loss: 0.0490
train_acc: 0.9823
train_f1: 0.9813
vaild_acc: 0.8763
vaild_f1: 0.8727
epoch: 3.0000



Loss: 0.0056: 100%|██████████| 550/550 [01:11<00:00,  7.66it/s]
100%|██████████| 69/69 [00:07<00:00,  8.66it/s]


train_loss: 0.0494
train_acc: 0.9838
train_f1: 0.9830
vaild_acc: 0.8926
vaild_f1: 0.8902
epoch: 4.0000



Loss: 0.0470: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.74it/s]


train_loss: 0.0381
train_acc: 0.9870
train_f1: 0.9864
vaild_acc: 0.8699
vaild_f1: 0.8626
epoch: 5.0000



Loss: 0.1431: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.66it/s]


train_loss: 0.0364
train_acc: 0.9883
train_f1: 0.9878
vaild_acc: 0.8813
vaild_f1: 0.8796
epoch: 6.0000



Loss: 0.1292: 100%|██████████| 550/550 [01:11<00:00,  7.67it/s]
100%|██████████| 69/69 [00:07<00:00,  8.68it/s]


train_loss: 0.0369
train_acc: 0.9882
train_f1: 0.9876
vaild_acc: 0.8976
vaild_f1: 0.8944
epoch: 7.0000



Loss: 0.0112: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.81it/s]


train_loss: 0.0243
train_acc: 0.9931
train_f1: 0.9927
vaild_acc: 0.8854
vaild_f1: 0.8815
epoch: 8.0000



Loss: 0.0854: 100%|██████████| 550/550 [01:11<00:00,  7.65it/s]
100%|██████████| 69/69 [00:07<00:00,  8.76it/s]


train_loss: 0.0336
train_acc: 0.9888
train_f1: 0.9879
vaild_acc: 0.8913
vaild_f1: 0.8860
epoch: 9.0000



Loss: 0.0053: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.69it/s]


train_loss: 0.0204
train_acc: 0.9928
train_f1: 0.9924
vaild_acc: 0.9054
vaild_f1: 0.9049
epoch: 10.0000



Loss: 0.0046: 100%|██████████| 550/550 [01:11<00:00,  7.68it/s]
100%|██████████| 69/69 [00:07<00:00,  8.70it/s]


train_loss: 0.0261
train_acc: 0.9917
train_f1: 0.9913
vaild_acc: 0.8653
vaild_f1: 0.8483
epoch: 11.0000



Loss: 0.1145: 100%|██████████| 550/550 [01:11<00:00,  7.72it/s]
100%|██████████| 69/69 [00:07<00:00,  8.76it/s]


train_loss: 0.0270
train_acc: 0.9917
train_f1: 0.9914
vaild_acc: 0.8908
vaild_f1: 0.8860
epoch: 12.0000



Loss: 0.2038: 100%|██████████| 550/550 [01:11<00:00,  7.67it/s]
100%|██████████| 69/69 [00:07<00:00,  8.72it/s]


train_loss: 0.0184
train_acc: 0.9951
train_f1: 0.9948
vaild_acc: 0.8949
vaild_f1: 0.8883
epoch: 13.0000



Loss: 0.0041: 100%|██████████| 550/550 [01:11<00:00,  7.66it/s]
100%|██████████| 69/69 [00:07<00:00,  8.74it/s]


train_loss: 0.0329
train_acc: 0.9894
train_f1: 0.9890
vaild_acc: 0.8908
vaild_f1: 0.8887
epoch: 14.0000



Loss: 0.0074: 100%|██████████| 550/550 [01:11<00:00,  7.70it/s]
100%|██████████| 69/69 [00:07<00:00,  8.68it/s]


train_loss: 0.0207
train_acc: 0.9938
train_f1: 0.9936
vaild_acc: 0.8922
vaild_f1: 0.8881
epoch: 15.0000



Loss: 0.0373: 100%|██████████| 550/550 [01:11<00:00,  7.64it/s]
100%|██████████| 69/69 [00:08<00:00,  8.62it/s]


train_loss: 0.0196
train_acc: 0.9943
train_f1: 0.9942
vaild_acc: 0.8885
vaild_f1: 0.8831
epoch: 16.0000



Loss: 0.0022: 100%|██████████| 550/550 [01:11<00:00,  7.67it/s]
100%|██████████| 69/69 [00:07<00:00,  8.74it/s]


train_loss: 0.0164
train_acc: 0.9953
train_f1: 0.9950
vaild_acc: 0.8863
vaild_f1: 0.8818
epoch: 17.0000



Loss: 0.0001: 100%|██████████| 550/550 [01:11<00:00,  7.65it/s]
100%|██████████| 69/69 [00:07<00:00,  8.73it/s]


train_loss: 0.0191
train_acc: 0.9947
train_f1: 0.9943
vaild_acc: 0.8913
vaild_f1: 0.8868
epoch: 18.0000



Loss: 0.0001: 100%|██████████| 550/550 [01:11<00:00,  7.70it/s]
100%|██████████| 69/69 [00:07<00:00,  8.86it/s]

train_loss: 0.0201
train_acc: 0.9931
train_f1: 0.9927
vaild_acc: 0.8922
vaild_f1: 0.8877
epoch: 19.0000






In [70]:
model = torch.load('../model/best_augmentation_256_4.pt')

In [71]:
# test 데이터 확인
tst_preds_list = []
tst_target_list = []

model.eval()
for image, target in tqdm(tst_loader):
    image = image.float().to(device)

    with torch.no_grad():
        preds = model(image)
    tst_preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())
    tst_target_list.extend(target.detach().cpu().numpy())
tst_acc = accuracy_score(tst_target_list, tst_preds_list)
tst_f1 = f1_score(tst_target_list, tst_preds_list, average='macro')

tst_acc, tst_f1

100%|██████████| 5/5 [00:00<00:00,  5.67it/s]


(0.9171974522292994, 0.913738222561752)

In [72]:
test_dataset = ImageDataset(
    "../datasets_fin/sample_submission.csv",
    "../datasets_fin/test/",
    transform=tst_transform
)
test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=0,
    pin_memory=True
)

In [73]:
preds_list = []

model.eval()
for image, _ in tqdm(test_loader):
    image = image.to(device)

    with torch.no_grad():
        preds = model(image)
    preds_list.extend(preds.argmax(dim=1).detach().cpu().numpy())

100%|██████████| 99/99 [00:14<00:00,  7.05it/s]


In [74]:
pred_df = pd.DataFrame(test_dataset.df, columns=['ID', 'target'])
pred_df['target'] = preds_list

In [75]:
sample_submission_df = pd.read_csv("../datasets_fin/sample_submission.csv")
assert (sample_submission_df['ID'] == pred_df['ID']).all()

In [79]:
pred_df.to_csv("../output/augmentation_pred_256_"+str(best_epoch)+".csv", index=False)

In [80]:
pred_df.head()

Unnamed: 0,ID,target
0,0008fdb22ddce0ce.jpg,2
1,00091bffdffd83de.jpg,12
2,00396fbc1f6cc21d.jpg,5
3,00471f8038d9c4b6.jpg,6
4,00901f504008d884.jpg,2
