In [1]:
import numpy as np
import pandas as pd
import os
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils
from torchvision import datasets, models, transforms

from sklearn.model_selection import train_test_split

from efficientnet_pytorch import EfficientNet

# For image-keypoints data augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

In [2]:
train_dir = os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open","train_imgs")
num_classes = 48
batch_size = 64
num_epochs = 100
num_splits = 10
num_earlystop = 10
input_w = 150
input_h = 150
learning_rate = 0.01
feature_extract = False

In [3]:
df = pd.read_csv(os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open","train_df.csv"))
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,...,right_palm_x,right_palm_y,spine2(back)_x,spine2(back)_y,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y
0,001-1-1-01-Z17_A-0000001.jpg,1046.389631,344.757881,1041.655294,329.820225,1059.429507,334.48423,1020.117796,338.890539,1048.0,...,1067.0,335.0,1019.48423,455.0,1026.51577,514.05473,998.578836,826.718013,1063.204067,838.827465
1,001-1-1-01-Z17_A-0000003.jpg,1069.850679,340.711494,1058.608552,324.59369,1075.242111,325.59369,1041.422997,331.694815,1065.593682,...,1081.18738,323.0,1046.953248,454.062706,1058.766231,508.797029,1002.265676,699.062706,1066.376234,841.499445
2,001-1-1-01-Z17_A-0000005.jpg,1084.475902,337.000008,1078.717997,323.757889,1095.648412,325.242119,1061.039884,329.351571,1086.461032,...,1101.0,334.0,1044.53896,442.05473,1052.844144,495.890539,989.437847,808.757889,1066.071417,841.749554
3,001-1-1-01-Z17_A-0000007.jpg,1042.320047,361.452689,1037.907194,344.117804,1050.328382,353.913729,1016.844144,340.913737,1042.164191,...,1057.406318,372.46104,982.937294,458.109462,990.375124,507.624866,1001.305177,829.233767,1159.516499,599.389997
4,001-1-1-01-Z17_A-0000009.jpg,1058.046395,343.164191,1046.717997,331.703163,1058.13265,331.781079,1031.258806,338.59369,1049.81262,...,1069.648429,334.109461,1024.843791,453.687572,1034.391088,510.843791,998.625231,805.218921,1059.625956,839.765102


In [4]:
imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns]
keypoints = []
for motion in motions.to_numpy():
    a_keypoints = []
    for i in range(0, motion.shape[0], 2):
        a_keypoints.append((float(motion[i]), float(motion[i+1])))
    keypoints.append(a_keypoints)
keypoints = np.array(keypoints)

In [5]:
def train_model(model, dataloaders, criterion, optimizer, earlystop=0, num_epochs=25, is_inception=False):
    since = time.time()
    
    val_acc_history = []
    val_loss_history = []
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0
    best_loss = 999999999
    
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        print('outputs.shape, aux_outputs shape : ', outputs.shape, aux_outputs.shape)
                        loss1 = criterion(outputs.float(), labels.float())
                        loss2 = criterion(aux_outputs.float(), labels.float())
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        print('output shape : ', outputs.shape)
                        print(outputs)
                        loss = criterion(outputs.float(), labels.float())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # for regression
                running_corrects += torch.sum(outputs == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Acc: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_acc, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
#             neptune.log_metric(f'{phase}_loss', epoch_loss)
#             neptune.log_metric(f'{phase}_acc', epoch_acc)
            
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    earlystop_value = 0
                else:
                    earlystop_value += 1
                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)
        print()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation Acc: {:4f}\n'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, {'acc': val_acc_history, 'loss': val_loss_history}

In [6]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = True

In [7]:
model_name = 'efficientnet'
model_ver = 'b5'
def initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    # variables is model specific.
#     model_ft = getattr(models, f'{model_name}{model_ver}')(pretrained=use_pretrained)
    model = EfficientNet.from_pretrained('efficientnet-b5')
    set_parameter_requires_grad(model, feature_extract)
    num_ftrs = model._fc.in_features
    model._fc = nn.Linear(num_ftrs, num_classes)
    #print(model)

    return model

# Initialize the model for this run
model_ft = initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True)

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Print the model we just instantiated
print(model_ft)

Loaded pretrained weights for efficientnet-b5
EfficientNet(
  (_conv_stem): Conv2dStaticSamePadding(
    3, 48, kernel_size=(3, 3), stride=(2, 2), bias=False
    (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
  )
  (_bn0): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
  (_blocks): ModuleList(
    (0): MBConvBlock(
      (_depthwise_conv): Conv2dStaticSamePadding(
        48, 48, kernel_size=(3, 3), stride=[1, 1], groups=48, bias=False
        (static_padding): ZeroPad2d(padding=(1, 1, 1, 1), value=0.0)
      )
      (_bn1): BatchNorm2d(48, eps=0.001, momentum=0.010000000000000009, affine=True, track_running_stats=True)
      (_se_reduce): Conv2dStaticSamePadding(
        48, 12, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_se_expand): Conv2dStaticSamePadding(
        12, 48, kernel_size=(1, 1), stride=(1, 1)
        (static_padding): Identity()
      )
      (_project_conv): Conv2

In [8]:
A_transforms = {
    'train':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.OneOf([A.HorizontalFlip(p=1),
                     A.RandomRotate90(p=1),
                     A.VerticalFlip(p=1)            
            ], p=0.5),
            A.OneOf([A.MotionBlur(p=1),
                     A.GaussNoise(p=1)                 
            ], p=0.5),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'val':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'test':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
}

In [9]:
class Dataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, keypoints, phase, class_labels=None, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
        keypoints = self.keypoints[idx]
    
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img, keypoints=keypoints, class_labels=self.class_labels)
            img = augmented['image']
            keypoints = augmented['keypoints']
        keypoints = np.array(keypoints).flatten()

        return img, keypoints
    
    def __len__(self):
        return len(self.imgs)

In [None]:
criterion = nn.MSELoss()

since = time.time()
X_train, X_val, y_train, y_val = train_test_split(imgs, keypoints, test_size=1/num_splits, random_state=42)
train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
dataloaders = {'train': train_loader, 'val': val_loader}

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

# Train and evaluate
model_ft, hists = train_model(
    model_ft, dataloaders, criterion, optimizer_ft,
    num_epochs=num_epochs,  is_inception=(model_name=="inception"))
weight_path = os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open", f"baseline_{model_name}_{model_ver}.pth")
torch.save(model_ft.state_dict(), weight_path)
time_elapsed = time.time() - since
print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

Epoch 1/100
----------
output shape :  torch.Size([64, 48])
tensor([[-0.0195,  0.2082,  0.0521,  ...,  0.1149,  0.1544, -0.0644],
        [-0.0816, -0.0284, -0.0936,  ..., -0.3137, -0.1596, -0.1544],
        [-0.0128, -0.1161, -0.2678,  ...,  0.2389,  0.3187, -0.1733],
        ...,
        [-0.0648,  0.2571, -0.1847,  ...,  0.0053, -0.0686,  0.1683],
        [-0.0652,  0.0072, -0.0630,  ..., -0.3969,  0.3598, -0.1659],
        [ 0.0233, -0.2680,  0.1070,  ..., -0.3193, -0.0344, -0.0630]],
       device='cuda:0', grad_fn=<AddmmBackward>)
output shape :  torch.Size([64, 48])
tensor([[ 0.4858,  0.3908,  0.6688,  ...,  0.8061,  0.7135,  0.9109],
        [ 1.3177,  1.0937,  0.8080,  ...,  0.9237,  1.3598,  1.1174],
        [ 0.4363,  0.6223,  0.2206,  ...,  0.9081,  0.8366,  0.6869],
        ...,
        [ 0.4631,  0.4165,  0.4280,  ...,  0.6220,  0.3384,  0.5771],
        [ 0.4070,  0.2537,  0.2890,  ...,  0.0805,  0.5997, -0.0105],
        [ 1.4201,  1.1407,  1.8481,  ...,  1.4836,  1.594

output shape :  torch.Size([64, 48])
tensor([[ 85.5135,  81.0803,  85.7608,  ...,  91.1384,  85.4781,  90.8919],
        [ 87.1853,  83.0841,  86.9436,  ...,  92.6413,  87.1843,  93.1455],
        [ 86.1813,  82.1274,  86.3748,  ...,  92.0082,  86.3219,  92.1932],
        ...,
        [ 82.3746,  78.3232,  82.5082,  ...,  87.8834,  82.7810,  88.3281],
        [100.0753,  95.6692, 100.1227,  ..., 106.8162, 100.4958, 106.7678],
        [101.2599,  96.5023, 101.6502,  ..., 107.6620, 101.1880, 108.1359]],
       device='cuda:0', grad_fn=<AddmmBackward>)
output shape :  torch.Size([64, 48])
tensor([[88.0686, 84.1491, 88.0819,  ..., 95.9545, 88.9974, 96.0690],
        [89.2906, 85.4532, 89.5268,  ..., 97.7365, 89.9715, 97.5326],
        [87.3097, 83.6574, 87.8011,  ..., 95.5157, 88.3749, 95.6061],
        ...,
        [84.2892, 81.3464, 84.6983,  ..., 92.0473, 85.3811, 92.1451],
        [91.2450, 87.0493, 91.4897,  ..., 99.4853, 91.9678, 99.6724],
        [89.7124, 85.6752, 89.7971,  ..., 97

output shape :  torch.Size([64, 48])
tensor([[ 78.7569,  74.1645,  78.8820,  ..., 102.8067,  81.1130, 102.6831],
        [ 72.4348,  67.9541,  72.4576,  ...,  94.8511,  74.9091,  94.5552],
        [ 68.5290,  64.4512,  68.8415,  ...,  89.9648,  70.7742,  89.6837],
        ...,
        [ 71.2688,  66.9424,  71.3110,  ...,  93.3443,  73.7885,  93.0359],
        [ 69.6845,  65.5165,  70.0402,  ...,  91.7643,  72.0575,  91.3797],
        [ 75.9368,  71.4866,  76.0377,  ...,  98.5839,  78.2241,  98.5459]],
       device='cuda:0', grad_fn=<AddmmBackward>)
output shape :  torch.Size([64, 48])
tensor([[72.7571, 68.1395, 73.0417,  ..., 95.0334, 74.6281, 94.9142],
        [72.1041, 67.2671, 72.1772,  ..., 94.6755, 74.0860, 94.0544],
        [73.7810, 68.6727, 73.6340,  ..., 96.5007, 75.8823, 96.1845],
        ...,
        [71.0909, 66.3843, 70.9428,  ..., 93.1568, 72.6621, 92.9058],
        [67.0173, 62.5934, 67.0407,  ..., 87.9575, 68.8912, 87.5633],
        [64.1979, 59.8691, 64.1689,  ..., 83

output shape :  torch.Size([64, 48])
tensor([[73.3241, 68.8536, 73.3029,  ..., 92.8441, 74.2714, 91.9731],
        [70.6403, 66.3328, 70.8938,  ..., 89.4885, 71.7177, 88.8898],
        [72.9374, 68.4536, 73.2366,  ..., 92.7544, 74.0624, 91.8927],
        ...,
        [75.5498, 71.2417, 75.8145,  ..., 95.6975, 76.5647, 94.7878],
        [72.6799, 68.3879, 72.9357,  ..., 91.7063, 73.4082, 90.9314],
        [70.1480, 65.9751, 70.1855,  ..., 88.7782, 71.0150, 88.0542]],
       device='cuda:0', grad_fn=<AddmmBackward>)
output shape :  torch.Size([64, 48])
tensor([[ 79.3178,  74.5237,  79.7129,  ...,  99.9815,  80.3671,  99.2531],
        [ 75.7717,  71.4921,  76.0114,  ...,  95.6057,  77.1000,  94.7612],
        [ 88.4827,  83.2558,  88.6504,  ..., 111.9546,  89.7114, 111.0110],
        ...,
        [ 68.5111,  64.4694,  68.5117,  ...,  86.1124,  69.3183,  85.5027],
        [ 77.1241,  73.0467,  77.4324,  ...,  97.3523,  78.5267,  96.4908],
        [ 68.4368,  64.6759,  68.6623,  ...,  86.2

output shape :  torch.Size([64, 48])
tensor([[-3628.9731, -3469.0698, -3740.7246,  ..., -3786.3989, -3657.3596,
         -3751.8135],
        [-6654.5039, -6354.7271, -6859.8711,  ..., -6925.9854, -6698.6895,
         -6867.2666],
        [-5259.5054, -5027.2939, -5422.1616,  ..., -5486.0337, -5301.0635,
         -5436.6338],
        ...,
        [-5948.8140, -5680.5894, -6132.4907,  ..., -6188.4688, -5987.6255,
         -6136.0430],
        [-2918.6970, -2785.6021, -3011.5876,  ..., -3014.5237, -2935.9636,
         -2990.8594],
        [-3230.1770, -3088.1504, -3329.6023,  ..., -3372.4163, -3255.8643,
         -3341.6060]], device='cuda:0')
output shape :  torch.Size([64, 48])
tensor([[-2681.1653, -2550.9653, -2773.7803,  ..., -2654.1323, -2669.5144,
         -2641.8223],
        [-4686.9995, -4468.5288, -4835.7026,  ..., -4827.9546, -4708.9624,
         -4792.6274],
        [-5946.3687, -5683.7153, -6130.3154,  ..., -6201.6465, -5993.0107,
         -6145.9629],
        ...,
        [

In [None]:
model_ft.load_state_dict(torch.load(weight_path))

In [None]:
# test_dir = f'{prefix}/data/test_imgs'
test_dir = os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open","test_imgs")
test_imgs = os.listdir(test_dir)

In [None]:
class TestDataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))

        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img
    
    def __len__(self):
        return len(self.imgs)
    
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
all_predictions = []
files = []
with torch.no_grad():
    for filenames, inputs in test_loader:
        predictions = list(model_ft(inputs.to(device)).cpu().numpy())
        files.extend(filenames)
        for prediction in predictions:
            all_predictions.append(prediction)

In [None]:
all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_w / 1920
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_h / 1080

In [None]:
path = os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open","sample_submission.csv")
df_sub = pd.read_csv(path)
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

In [None]:
df.to_csv(os.path.join("C:\\Users\\hwanseung\\Desktop\\", "open", "1. open","efficient.csv"), index=False)