In [None]:
!pip install albumentations
!pip install --upgrade albumentations

In [None]:
import numpy as np
import pandas as pd
import os
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils
from torchvision import datasets, models, transforms

from sklearn.model_selection import train_test_split

# For image-keypoints data augmentation
import albumentations as A
from albumentations.pytorch import ToTensor
import cv2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Prefix data directory #각자 데이터가 있는 경로 넣기 #local 폴더 하나 만들기
prefix_dir = '/content/drive/MyDrive/1. open'

# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
train_dir = f'{prefix_dir}/train_imgs'

# Models to choose from torchvision
model_name = 'resnet'
model_ver = '18'

# Number of classes in the dataset
num_classes = 48

# Batch size for training (change depending on how much memory you have)
batch_size = 64

# Number of epochs and earlystop to train for
num_epochs = 60

num_splits = 10
num_earlystop = 10

# Iput size for resize imgae
input_w = 150
input_h = 150

# Learning rate for optimizer
learning_rate = 0.01

# Flag for feature extracting. When False, we finetune the whole model,
# when True we only update the reshaped layer params
feature_extract = False

In [None]:
df = pd.read_csv(f'{prefix_dir}/train_df.csv')
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,right_ear_y,left_shoulder_x,left_shoulder_y,right_shoulder_x,right_shoulder_y,left_elbow_x,left_elbow_y,right_elbow_x,right_elbow_y,left_wrist_x,left_wrist_y,right_wrist_x,right_wrist_y,left_hip_x,left_hip_y,right_hip_x,right_hip_y,left_knee_x,left_knee_y,right_knee_x,right_knee_y,left_ankle_x,left_ankle_y,right_ankle_x,right_ankle_y,neck_x,neck_y,left_palm_x,left_palm_y,right_palm_x,right_palm_y,spine2(back)_x,spine2(back)_y,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y
0,001-1-1-01-Z17_A-0000001.jpg,1046.389631,344.757881,1041.655294,329.820225,1059.429507,334.48423,1020.117796,338.890539,1048.0,343.0,992.0,394.0,1054.0,400.0,956.0,368.0,1134.0,371.0,1003.497242,327.640085,1078.0,341.0,999.0,570.0,1046.0,573.0,995.0,695.0,1054.0,698.0,983.0,820.0,1042.0,829.0,1019.107277,373.766222,1013.13736,316.311695,1067.0,335.0,1019.48423,455.0,1026.51577,514.05473,998.578836,826.718013,1063.204067,838.827465
1,001-1-1-01-Z17_A-0000003.jpg,1069.850679,340.711494,1058.608552,324.59369,1075.242111,325.59369,1041.422997,331.694815,1065.593682,333.968459,1010.0,398.0,1077.0,386.0,974.0,398.0,1144.0,337.0,1004.0,346.0,1094.0,326.0,1020.0,557.0,1069.0,567.0,1013.0,583.0,1072.0,697.0,1003.843781,678.797029,1042.0,829.0,1047.27944,362.031898,1017.383112,332.890539,1081.18738,323.0,1046.953248,454.062706,1058.766231,508.797029,1002.265676,699.062706,1066.376234,841.499445
2,001-1-1-01-Z17_A-0000005.jpg,1084.475902,337.000008,1078.717997,323.757889,1095.648412,325.242119,1061.039884,329.351571,1086.461032,334.109461,1020.0,381.0,1083.0,389.0,984.0,362.0,1163.0,372.0,1027.507419,327.38312,1111.0,340.0,1016.0,552.0,1061.0,556.0,1018.0,674.0,1070.0,696.0,987.703151,788.867342,1044.0,826.0,1063.029716,358.679953,1042.374777,319.031541,1101.0,334.0,1044.53896,442.05473,1052.844144,495.890539,989.437847,808.757889,1066.071417,841.749554
3,001-1-1-01-Z17_A-0000007.jpg,1042.320047,361.452689,1037.907194,344.117804,1050.328382,353.913729,1016.844144,340.913737,1042.164191,359.351579,968.0,392.0,1010.0,411.0,941.812612,339.703151,1109.0,441.0,971.654931,328.648429,1066.812604,383.40631,972.0,571.0,1000.0,559.0,991.0,700.0,1110.0,469.0,991.406302,816.13265,1133.0,591.0,1007.302983,374.179405,981.842327,329.109461,1057.406318,372.46104,982.937294,458.109462,990.375124,507.624866,1001.305177,829.233767,1159.516499,599.389997
4,001-1-1-01-Z17_A-0000009.jpg,1058.046395,343.164191,1046.717997,331.703163,1058.13265,331.781079,1031.258806,338.59369,1049.81262,338.18738,997.0,393.0,1054.0,395.0,961.0,423.0,1132.0,360.0,988.676303,357.688297,1080.0,337.0,1000.0,567.0,1043.0,570.0,1017.0,673.0,1059.0,701.0,998.406302,784.570501,1040.0,827.0,1036.318945,366.195727,999.777421,349.829291,1069.648429,334.109461,1024.843791,453.687572,1034.391088,510.843791,998.625231,805.218921,1059.625956,839.765102


In [None]:
imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns]
keypoints = []
for motion in motions.to_numpy():
    a_keypoints = []
    for i in range(0, motion.shape[0], 2):
        a_keypoints.append((float(motion[i]), float(motion[i+1])))
    keypoints.append(a_keypoints)
keypoints = np.array(keypoints)

In [None]:
def train_model(model, dataloaders, criterion, optimizer, earlystop=0, num_epochs=25, is_inception=False):
    since = time.time()
    
    val_acc_history = []
    val_loss_history = []
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0
    best_loss = 999999999
    
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs.float(), labels.float())
                        loss2 = criterion(aux_outputs.float(), labels.float())
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs.float(), labels.float())

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                # for regression
                running_corrects += torch.sum(outputs == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Acc: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_acc, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
            #neptune.log_metric(f'{phase}_loss', epoch_loss)
            #neptune.log_metric(f'{phase}_acc', epoch_acc)
            
            # deep copy the model
            if phase == 'val':
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_model_wts = copy.deepcopy(model.state_dict())
                    earlystop_value = 0
                else:
                    earlystop_value += 1
                val_loss_history.append(epoch_loss)
                val_acc_history.append(epoch_acc)
        print()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best validation Acc: {:4f}\n'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, {'acc': val_acc_history, 'loss': val_loss_history}

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [None]:
def initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    # variables is model specific.
    model_ft = getattr(models, f'{model_name}{model_ver}')(pretrained=use_pretrained)
    set_parameter_requires_grad(model_ft, feature_extract)
    num_ftrs = model_ft.fc.in_features
    model_ft.fc = nn.Linear(num_ftrs, num_classes)

    return model_ft

# Initialize the model for this run
model_ft = initialize_model(model_name, model_ver, num_classes, feature_extract, use_pretrained=True)

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Print the model we just instantiated
print(model_ft)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
# Data augmentation and normalization for training with Albumentations
A_transforms = {
    'train':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.OneOf([A.HorizontalFlip(p=1),
                     A.RandomRotate90(p=1),
                     A.VerticalFlip(p=1)            
            ], p=0.5),
            A.OneOf([A.MotionBlur(p=1),
                     A.GaussNoise(p=1)                 
            ], p=0.5),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'val':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()
        ], keypoint_params=A.KeypointParams(format='xy', label_fields=['class_labels'], remove_invisible=True, angle_in_degrees=True)),
    
    'test':
        A.Compose([
            A.Resize(input_h, input_w, always_apply=True),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensor()
        ])
}

In [None]:
class Dataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, keypoints, phase, class_labels=None, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))
        keypoints = self.keypoints[idx]
    
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img, keypoints=keypoints, class_labels=self.class_labels)
            img = augmented['image']
            keypoints = augmented['keypoints']
        keypoints = np.array(keypoints).flatten()

        return img, keypoints
    
    def __len__(self):
        return len(self.imgs)

#train 넘 길어서 끝나면 찾아봤는데 한 셀 끝나면 삡소리 나는 코드래ㅎ
from google.colab import output
output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

In [None]:
# Setup the loss fxn
criterion = nn.MSELoss()

since = time.time()
X_train, X_val, y_train, y_val = train_test_split(imgs, keypoints, test_size=1/num_splits, random_state=42)
train_data = Dataset(train_dir, X_train, y_train, data_transforms=A_transforms, class_labels=class_labels, phase='train')
val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False)
dataloaders = {'train': train_loader, 'val': val_loader}

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)

# Train and evaluate
model_ft, hists = train_model(
    model_ft, dataloaders, criterion, optimizer_ft,
    num_epochs=num_epochs, earlystop=num_earlystop, is_inception=(model_name=="inception"))
torch.save(model_ft.state_dict(), f'{prefix_dir}/local/baseline_{model_name}{model_ver}.pt')
time_elapsed = time.time() - since
print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

Epoch 1/60
----------
train (3775) Loss: 817.5892 Acc: 0.0000 Elapsed time: 1m 44s
val (420) Loss: 256.2412 Acc: 0.0000 Elapsed time: 1m 54s

Epoch 2/60
----------
train (3775) Loss: 254.3940 Acc: 0.0000 Elapsed time: 1m 45s
val (420) Loss: 377.2808 Acc: 0.0000 Elapsed time: 1m 55s

Epoch 3/60
----------
train (3775) Loss: 165.5187 Acc: 0.0000 Elapsed time: 1m 43s
val (420) Loss: 153.9246 Acc: 0.0000 Elapsed time: 1m 54s

Epoch 4/60
----------
train (3775) Loss: 130.4168 Acc: 0.0000 Elapsed time: 1m 44s
val (420) Loss: 171.0567 Acc: 0.0000 Elapsed time: 1m 54s

Epoch 5/60
----------
train (3775) Loss: 90.9904 Acc: 0.0000 Elapsed time: 1m 44s
val (420) Loss: 79.8386 Acc: 0.0000 Elapsed time: 1m 54s

Epoch 6/60
----------
train (3775) Loss: 74.8638 Acc: 0.0000 Elapsed time: 1m 43s
val (420) Loss: 60.6093 Acc: 0.0000 Elapsed time: 1m 53s

Epoch 7/60
----------
train (3775) Loss: 66.6540 Acc: 0.0000 Elapsed time: 1m 43s
val (420) Loss: 62.5706 Acc: 0.0000 Elapsed time: 1m 53s

Epoch 8/60
-

In [None]:
model_ft.load_state_dict(torch.load(f'{prefix_dir}/local/baseline_{model_name}{model_ver}.pt'))

<All keys matched successfully>

In [None]:
test_dir = f'{prefix_dir}/test_imgs'
test_imgs = os.listdir(test_dir)

In [None]:
class TestDataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]))

        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img
    
    def __len__(self):
        return len(self.imgs)
    
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = data_utils.DataLoader(test_data, batch_size=batch_size * 4, shuffle=False)

In [None]:
all_predictions = []
files = []
with torch.no_grad():
    for filenames, inputs in test_loader:
        predictions = list(model_ft(inputs.to(device)).cpu().numpy())
        files.extend(filenames)
        for prediction in predictions:
            all_predictions.append(prediction)

In [None]:
all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_w / 1920
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_h / 1080

In [None]:
df_sub = pd.read_csv(f'{prefix_dir}/sample_submission.csv')
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

Unnamed: 0,image,nose_x,nose_y,left_eye_x,left_eye_y,right_eye_x,right_eye_y,left_ear_x,left_ear_y,right_ear_x,right_ear_y,left_shoulder_x,left_shoulder_y,right_shoulder_x,right_shoulder_y,left_elbow_x,left_elbow_y,right_elbow_x,right_elbow_y,left_wrist_x,left_wrist_y,right_wrist_x,right_wrist_y,left_hip_x,left_hip_y,right_hip_x,right_hip_y,left_knee_x,left_knee_y,right_knee_x,right_knee_y,left_ankle_x,left_ankle_y,right_ankle_x,right_ankle_y,neck_x,neck_y,left_palm_x,left_palm_y,right_palm_x,right_palm_y,spine2(back)_x,spine2(back)_y,spine1(waist)_x,spine1(waist)_y,left_instep_x,left_instep_y,right_instep_x,right_instep_y
0,714-3-5-35-Z94_D-0000001.jpg,983.371,373.967,1017.8,361.027,997.629,361.597,1042.76,371.309,1001.18,368.643,1078.17,440.561,973.326,434.984,1116.9,486.492,937.062,474.596,1062.76,486.525,919.911,484.848,1070.01,603.051,1004.55,591.008,1078.24,692.812,1011.41,691.53,1127.77,781.845,1072.88,774.889,1008.92,414.647,1034.13,491.002,908.92,486.835,1001.17,485.329,1013.81,543.464,1104.62,799.889,1051.54,791.001
1,714-3-5-35-Z94_B-0000027.jpg,1107.97,532.353,1137.05,536.849,1110.8,540.34,1130.2,531.808,1081.09,534.971,1148.99,561.562,1010.54,560.532,1204.06,625.179,978.218,625.658,1210.03,698.744,974.863,717.147,1066.22,631.881,986.207,616.251,1104.38,698.596,1004.51,718.157,1092.05,769.788,998.965,791.517,1107.97,539.983,1211.94,699.276,999.188,696.859,1051.0,544.586,1042.05,579.178,1101.68,765.301,1004.35,781.087
2,714-3-5-35-Z94_C-0000029.jpg,946.431,326.957,974.698,312.677,948.147,312.053,990.44,314.144,937.994,309.993,1025.28,383.895,885.109,373.13,1068.43,427.614,825.613,408.752,1017.7,397.46,815.901,391.214,988.529,573.648,905.352,559.541,1001.41,724.888,907.012,719.254,1012.01,837.378,931.785,829.856,968.057,349.791,1006.55,390.673,829.637,380.514,944.64,429.971,949.645,498.315,1008.0,854.285,924.399,845.328
3,714-3-5-35-Z94_C-0000001.jpg,946.712,319.096,972.311,304.889,946.329,304.855,987.572,304.898,936.099,301.449,1022.05,372.479,884.142,364.13,1061.51,401.216,820.508,387.435,1007.52,370.881,809.523,367.886,987.392,567.914,906.155,556.71,994.531,725.165,904.043,720.461,996.285,837.667,921.455,830.741,969.145,340.23,998.651,363.263,826.371,356.228,946.88,421.963,952.601,492.803,994.22,855.011,915.667,846.371
4,714-3-5-35-Z94_D-0000003.jpg,987.948,374.324,1021.63,364.022,1003.51,365.478,1044.12,376.711,1006.27,376.325,1074.14,446.2,980.947,445.665,1105.34,504.6,946.959,503.09,1051.22,529.687,931.553,535.037,1067.68,603.454,1008.55,594.064,1079.0,681.799,1021.78,682.886,1126.52,761.475,1082.16,758.062,1010.23,419.34,1022.91,533.377,918.777,534.685,1000.85,486.431,1012.44,544.079,1104.79,773.596,1063.23,767.928


In [None]:
df.to_csv(f'{prefix_dir}/submission_{model_name}{model_ver}.csv', index=False)