# Image Crop

- Image Crop의 경우 [BaseLine](https://www.dacon.io/competitions/official/235806/codeshare/3365?page=1&dtype=recent)과 [5252님](https://www.dacon.io/competitions/official/235805/codeshare/3362?page=2&dtype=recent)의 코드를 함께 이용했습니다.

5252님과 Dacon.MockingJay님 감사합니다

In [None]:
import pandas as pd
import numpy as np
from glob import glob

from PIL import Image
import cv2
from tqdm import tqdm

import os
import json

import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')
import matplotlib.pyplot as plt

In [None]:
data_path = '/data/competition/Dacon/Traffic_Hand_Signal/dataset'

train_path = data_path + '/train'

test_path = data_path + '/test'

action_information = pd.read_csv(data_path + '/action_information.csv')

In [None]:
train_folders = sorted(glob(train_path + '/*'), key = lambda x : int(x.split('/file_')[-1]))

test_folders  = sorted(glob(test_path + '/*'), key = lambda x : int(x.split('/file_')[-1]))

In [None]:
new_image_directory = data_path + '/new_images'
new_train_image_directory = new_image_directory + '/train'
new_test_image_directory = new_image_directory + '/test'

action_information = pd.read_csv(data_path + '/action_information.csv')
sample_submission = pd.read_csv(data_path + '/sample_submission.csv')

In [None]:
classes = pd.get_dummies(action_information[['Label']], columns = ['Label']).to_numpy()
classes

In [None]:
def make_new_dir(path) : 
    if os.path.isdir(path) == False:
        os.makedirs(path)

In [None]:
make_new_dir(new_image_directory)
make_new_dir(new_train_image_directory)
make_new_dir(new_test_image_directory)

In [None]:
train_directories = np.array(sorted(glob(train_path + '/*'), key = lambda x : int(x.split('/')[-1].split('_')[-1])))

for train_directory in tqdm(train_directories, total = len(train_directories)) : 
    file_name = train_directory.split('/')[-1]
    make_new_dir(new_train_image_directory + '/'+file_name)
    
    image_paths = sorted(glob(train_directory + '/*.jpg'), key = lambda x : int(x.split('/')[-1].replace('.jpg','')))
    json_path   = glob(train_directory + '/*.json')[0]

    js = json.load(open(json_path))
    target = js.get('action')
    target = classes[target]
    bounding_boxes = js.get('sequence').get('bounding_box')
    bounding_boxes = [(float(a),float(b),float(c),float(d)) for a,b,c,d in bounding_boxes]

    for image_path, bounding_box in zip(image_paths, bounding_boxes) : 
        image = Image.open(image_path)
        image = image.crop(bounding_box) # left top right bottom
        image = image.resize((224,224))
        image.save(new_train_image_directory + image_path.split('/train')[1])        

In [None]:
test_directories = np.array(sorted(glob(test_path + '/*'), key = lambda x : int(x.split('/')[-1].split('_')[-1])))

for test_directory in tqdm(test_directories, total = len(test_directories)) : 
    file_name = test_directory.split('/')[-1]
    make_new_dir(new_test_image_directory + '/'+file_name)
    
    image_paths = sorted(glob(test_directory + '/*.jpg'), key = lambda x : int(x.split('/')[-1].replace('.jpg','')))
    json_path   = glob(test_directory + '/*.json')[0]

    js = json.load(open(json_path))
    target = js.get('action')
    target = classes[target]
    bounding_boxes = js.get('sequence').get('bounding_box')
    bounding_boxes = [(float(a),float(b),float(c),float(d)) for a,b,c,d in bounding_boxes]

    for image_path, bounding_box in zip(image_paths, bounding_boxes) : 
        image = Image.open(image_path)
        image = image.crop(bounding_box)
        image = image.resize((224,224))
        image.save(new_test_image_directory + image_path.split('/test')[1])        

# Train

In [1]:
import pandas as pd
import numpy as np
from glob import glob

from PIL import Image
import cv2
from tqdm import tqdm

import os
import shutil
import json

import torch
from torch import nn
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
import albumentations as A
from efficientnet_pytorch import EfficientNet

import os
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold

import torch
import random
import torch.backends.cudnn as cudnn

torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.cuda.manual_seed_all(42)
np.random.seed(42)
cudnn.benchmark = False
cudnn.deterministic = True
random.seed(42)

In [2]:
data_path = '/data/competition/Dacon/Traffic_Hand_Signal/dataset'

train_path = data_path + '/train'

train_image_path = data_path + '/new_images/train'

action_information = pd.read_csv(data_path + '/action_information.csv')

In [3]:
train_folders = sorted(glob(train_path + '/*'), key = lambda x : int(x.split('/file_')[-1]))

train_img_folders = sorted(glob(train_image_path + '/*'), key = lambda x : int(x.split('/file_')[-1]))

모든 데이터를 사용하지 않고 데이터의 일부분만을 사용하여 학습하였더니 더 잘 학습되었습니다.

In [4]:
imgs = []

for i in train_img_folders:
    train_imgs = sorted(glob(i+"/*.jpg"), key = lambda x : int(x.split('.jpg')[0].split("/")[-1]))
    for j in train_imgs[25:45]:
        imgs.append(j)
    for j in train_imgs[-45:-25]:
        imgs.append(j)

In [5]:
answers = []
for train_folder in train_folders :
    json_path = glob(train_folder + '/*.json')[0]
    js = json.load(open(json_path))
    cat = js.get('action')
    answers.append([train_folder.replace(data_path,''),cat])

answers = pd.DataFrame(answers, columns = ['train_path','answer'])
answers
answers["answer"][33] = 5

In [6]:
labels = np.zeros(len(imgs))
for i in tqdm(range(answers.shape[0])):
    for j in range(len(imgs)):
        if answers["train_path"][i] in imgs[j]:
            labels[j] = answers["answer"][i]

100%|██████████| 142/142 [00:04<00:00, 30.35it/s]


In [7]:
device = torch.device("cuda:0")
dropout_rate = 0.1
class_num = 6
learning_rate = 1e-4
BATCH_SIZE = 16
EPOCHS = 25
MODELS = 'efficientnet-b0'
save_path = f"./models/Final_{EPOCHS}"
FOLDS = 5

In [8]:
imgs = np.array(imgs)
labels = np.array(labels)

In [9]:
from sklearn.utils import shuffle
imgs, labels = shuffle(imgs, labels, random_state=42)

In [10]:
albumentations_transform = A.Compose([
    A.RandomGamma(),
    A.ShiftScaleRotate(),
    A.GaussianBlur(),
    A.GaussNoise()
])

In [11]:
class CustomDataset(Dataset):
    def __init__(self, imgs, labels=None, transformer=None, mode="train"):
        self.imgs = imgs
        self.transformer = transformer
        self.mode = mode
        if self.mode == "train" :
            self.labels = labels
            
    def __len__(self):
        return len(self.imgs)
    
    def __getitem__(self, i):
        img = cv2.imread(self.imgs[i]).astype(np.float32)/255
        img = cv2.resize(img, dsize=(224,224))
        if self.mode == "train":
            if self.transformer != None:
                img = self.transformer(image=img)
                img = np.transpose(img["image"], (2,0,1))
            else:
                img = np.transpose(img, (2,0,1))
            return {
                "img" : torch.tensor(img, dtype=torch.float32),
                "label" : torch.tensor(self.labels[i], dtype=torch.long)
            }
        else:
            img = np.transpose(img, (2,0,1))
            return {
                "img" : torch.tensor(img, dtype=torch.float32)
            }

In [12]:
def train_step(batch_item, epoch, batch, training):
    img = batch_item['img'].to(device)
    label = batch_item['label'].to(device)
    if training is True:
        model.train()
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(img)
            loss = criterion(output, label)
        loss.backward()
        optimizer.step()
        
        return loss
    else:
        model.eval()
        with torch.no_grad():
            output = model(img)
            loss = criterion(output, label)
            
        return loss

In [13]:
skf = StratifiedKFold(n_splits=FOLDS, random_state=42, shuffle=True)

n_iter = 0

for train_idx, val_idx in skf.split(imgs, labels):
    
    model = EfficientNet.from_pretrained(MODELS, num_classes=class_num, advprop=True)
    model._dropout.p = dropout_rate
    model = model.to(device)
    
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()
    scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5)
    
    n_iter += 1

    train_dataset = CustomDataset(imgs[train_idx], labels[train_idx], transformer=albumentations_transform)
    val_dataset = CustomDataset(imgs[val_idx], labels[val_idx], transformer=albumentations_transform)

    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=16, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=16, shuffle=True)

    sample_batch = next(iter(train_dataloader))


    loss_plot, val_loss_plot = [], []

    for epoch in range(EPOCHS):
        total_loss, total_val_loss = 0, 0

        tqdm_dataset = tqdm(enumerate(train_dataloader))
        training = True
        for batch, batch_item in tqdm_dataset:
            batch_loss = train_step(batch_item, epoch, batch, training)
            total_loss += batch_loss

            tqdm_dataset.set_postfix({
                'Epoch': epoch + 1,
                'Loss': '{:06f}'.format(batch_loss.item()),
                'Total Loss' : '{:06f}'.format(total_loss/(batch+1))
            })
        loss_plot.append(total_loss/(batch+1))

        tqdm_dataset = tqdm(enumerate(val_dataloader))
        training = False
        for batch, batch_item in tqdm_dataset:
            batch_loss = train_step(batch_item, epoch, batch, training)
            total_val_loss += batch_loss

            tqdm_dataset.set_postfix({
                'Epoch': epoch + 1,
                'Val Loss': '{:06f}'.format(batch_loss.item()),
                'Total Val Loss' : '{:06f}'.format(total_val_loss/(batch+1))
            })
        val_loss_plot.append(total_val_loss/(batch+1))
        scheduler.step(total_val_loss/(batch+1))

        if np.min(val_loss_plot) == val_loss_plot[-1]:
            torch.save(model, save_path + f"_{n_iter}.pt")
            print("## Model Save")
    break

Loaded pretrained weights for efficientnet-b0


284it [00:31,  8.95it/s, Epoch=1, Loss=1.167573, Total Loss=1.496572]
71it [00:02, 29.24it/s, Epoch=1, Val Loss=1.052727, Total Val Loss=1.105829]


## Model Save


284it [00:32,  8.75it/s, Epoch=2, Loss=0.655527, Total Loss=1.063289]
71it [00:02, 28.71it/s, Epoch=2, Val Loss=0.858720, Total Val Loss=0.919579]


## Model Save


284it [00:32,  8.77it/s, Epoch=3, Loss=0.825107, Total Loss=0.988487]
71it [00:02, 27.90it/s, Epoch=3, Val Loss=1.481458, Total Val Loss=0.896710]


## Model Save


284it [00:33,  8.58it/s, Epoch=4, Loss=1.154914, Total Loss=0.946190]
71it [00:02, 24.63it/s, Epoch=4, Val Loss=0.734872, Total Val Loss=0.862482]


## Model Save


284it [00:34,  8.30it/s, Epoch=5, Loss=0.950113, Total Loss=0.891849]
71it [00:02, 24.87it/s, Epoch=5, Val Loss=0.663219, Total Val Loss=0.868961]
284it [00:34,  8.21it/s, Epoch=6, Loss=0.681989, Total Loss=0.806364]
71it [00:02, 24.76it/s, Epoch=6, Val Loss=0.694261, Total Val Loss=0.719491]


## Model Save


284it [00:34,  8.28it/s, Epoch=7, Loss=0.256926, Total Loss=0.733518]
71it [00:02, 25.34it/s, Epoch=7, Val Loss=0.839757, Total Val Loss=0.647697]


## Model Save


284it [00:35,  7.99it/s, Epoch=8, Loss=0.694262, Total Loss=0.699280]
71it [00:02, 24.37it/s, Epoch=8, Val Loss=0.545413, Total Val Loss=0.635735]


## Model Save


284it [00:33,  8.44it/s, Epoch=9, Loss=0.521789, Total Loss=0.645714]
71it [00:02, 24.74it/s, Epoch=9, Val Loss=0.457722, Total Val Loss=0.523832]


## Model Save


284it [00:34,  8.15it/s, Epoch=10, Loss=0.265421, Total Loss=0.598353]
71it [00:02, 24.45it/s, Epoch=10, Val Loss=0.492630, Total Val Loss=0.562009]
284it [00:34,  8.14it/s, Epoch=11, Loss=0.130105, Total Loss=0.578625]
71it [00:02, 24.28it/s, Epoch=11, Val Loss=0.286832, Total Val Loss=0.466145]


## Model Save


284it [00:32,  8.80it/s, Epoch=12, Loss=0.649750, Total Loss=0.554821]
71it [00:02, 28.50it/s, Epoch=12, Val Loss=0.627870, Total Val Loss=0.558915]
284it [00:32,  8.84it/s, Epoch=13, Loss=0.602600, Total Loss=0.527562]
71it [00:02, 29.46it/s, Epoch=13, Val Loss=0.629946, Total Val Loss=0.413143]


## Model Save


284it [00:32,  8.80it/s, Epoch=14, Loss=0.560306, Total Loss=0.495230]
71it [00:02, 26.57it/s, Epoch=14, Val Loss=0.531883, Total Val Loss=0.426137]
284it [00:31,  8.98it/s, Epoch=15, Loss=0.221211, Total Loss=0.468851]
71it [00:02, 29.19it/s, Epoch=15, Val Loss=0.665586, Total Val Loss=0.447493]
284it [00:32,  8.81it/s, Epoch=16, Loss=0.395507, Total Loss=0.448970]
71it [00:02, 29.15it/s, Epoch=16, Val Loss=0.271607, Total Val Loss=0.402512]


## Model Save


284it [00:31,  8.90it/s, Epoch=17, Loss=0.251221, Total Loss=0.454178]
71it [00:02, 29.73it/s, Epoch=17, Val Loss=0.147530, Total Val Loss=0.394197]


## Model Save


284it [00:31,  8.93it/s, Epoch=18, Loss=0.309420, Total Loss=0.435367]
71it [00:02, 28.80it/s, Epoch=18, Val Loss=0.357886, Total Val Loss=0.355474]


## Model Save


284it [00:30,  9.37it/s, Epoch=19, Loss=0.066127, Total Loss=0.424044]
71it [00:02, 28.31it/s, Epoch=19, Val Loss=0.468782, Total Val Loss=0.375409]
284it [00:31,  9.03it/s, Epoch=20, Loss=0.777823, Total Loss=0.423422]
71it [00:02, 28.15it/s, Epoch=20, Val Loss=0.547918, Total Val Loss=0.349284]


## Model Save


284it [00:28,  9.83it/s, Epoch=21, Loss=0.354864, Total Loss=0.403497]
71it [00:02, 23.76it/s, Epoch=21, Val Loss=0.536819, Total Val Loss=0.346329]


## Model Save


284it [00:34,  8.17it/s, Epoch=22, Loss=0.273219, Total Loss=0.377329]
71it [00:02, 24.15it/s, Epoch=22, Val Loss=0.499153, Total Val Loss=0.303536]


## Model Save


284it [00:34,  8.19it/s, Epoch=23, Loss=0.228693, Total Loss=0.386759]
71it [00:02, 24.44it/s, Epoch=23, Val Loss=0.260221, Total Val Loss=0.325302]
284it [00:34,  8.30it/s, Epoch=24, Loss=0.465904, Total Loss=0.369123]
71it [00:02, 24.20it/s, Epoch=24, Val Loss=0.368085, Total Val Loss=0.323131]
284it [00:35,  8.08it/s, Epoch=25, Loss=0.540739, Total Loss=0.346619]
71it [00:03, 23.53it/s, Epoch=25, Val Loss=0.138858, Total Val Loss=0.305045]


# Test

In [14]:
import pandas as pd
import numpy as np
from glob import glob

from PIL import Image
import cv2
from tqdm import tqdm

import os
import shutil
import json

import torch
from torch import nn
from torchvision import models
from torch.utils.data import Dataset, DataLoader
from efficientnet_pytorch import EfficientNet
import albumentations as A
import timm

import os
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter('ignore')
import matplotlib.pyplot as plt

In [24]:
MODELS = 'efficientnet-b0'
save_path = f"./models/Final"
device = torch.device("cuda:0")
class_num = 6
FOLDS = 1

In [25]:
data_path = '/data/competition/Dacon/Traffic_Hand_Signal/dataset'

test_path = data_path + '/test'

test_img_path = data_path + '/new_images/test'

hand_gesture = pd.read_csv(data_path + '/action_information.csv')

In [26]:
test_folders  = sorted(glob(test_path + '//*'), key = lambda x : int(x.split('/file_')[-1]))

test_img_folders  = sorted(glob(test_img_path + '//*'), key = lambda x : int(x.split('/file_')[-1]))

In [27]:
imgs = []

for i in test_img_folders:
    test_imgs = sorted(glob(i+"/*.jpg"), key = lambda x : int(x.split('.jpg')[0].split("/")[-1]))
    for j in test_imgs[30:45]:
        imgs.append(j)

In [28]:
model = EfficientNet.from_pretrained(MODELS, num_classes=class_num, advprop=True)
model = model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

Loaded pretrained weights for efficientnet-b0


In [29]:
def predict(dataset):
    model.eval()
    result = []
    for batch_item in dataset:
        img = batch_item['img'].to(device)
        with torch.no_grad():
            output = model(img)
        output = output.cpu().numpy()
        result.extend(output)
        
    return result

In [30]:
def softmax(x):
    f_x = np.exp(x) / np.sum(np.exp(x))
    return f_x

In [31]:
answers = []
for test_folder in test_folders :
    json_path = glob(test_folder + '/*.json')[0]
    js = json.load(open(json_path))
    answers.append([test_folder.replace(data_path,'')])

answers = pd.DataFrame(answers, columns = ['test_path'])
answers

Unnamed: 0,test_path
0,/test/file_142
1,/test/file_143
2,/test/file_144
3,/test/file_145
4,/test/file_146
5,/test/file_147
6,/test/file_148
7,/test/file_149
8,/test/file_150
9,/test/file_151


In [32]:
submission = pd.read_csv(data_path + '/sample_submission.csv')
sub_ALL = []
sub_ALL_list = []

for i in tqdm(range(FOLDS)):

    model = torch.load(save_path + f"_{EPOCHS}_1.pt", map_location=device)

    test_dataset = CustomDataset(imgs=imgs, labels=None, mode='test')
    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=16)

    pred = predict(test_dataloader)

    a = []
    for i in range(len(pred)):
        a.append(softmax(pred[i]))
    a = np.array(a)
    
    sub_ALL = []
    
    for i in range(answers.shape[0]):
        sub = 0
        num_sub = 0
        for j in range(len(imgs)):
            if answers["test_path"][i] in imgs[j]:
                num_sub = num_sub + 1
                sub = sub + a[j]
        sub = sub / num_sub
        sub_ALL.append(sub)
        
    sub_ALL_list.append(sub_ALL)
    
sub_ALL_list = np.array(sub_ALL_list)
sub_ALL_list = np.mean(sub_ALL_list, axis=0)
submission.iloc[:,1:] = sub_ALL_list

100%|██████████| 1/1 [00:03<00:00,  3.61s/it]


In [33]:
SAVE_CSV_NAME = f'./submission/Final.csv'

submission.to_csv(SAVE_CSV_NAME, index=False)