In [1]:
import numpy as np
import pandas as pd
import os
import time
import copy
from typing import Tuple

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data as data_utils

import torchvision
from torchvision import datasets, models
from torchvision.models.detection import KeypointRCNN, backbone_utils, keypointrcnn_resnet50_fpn
from torchvision.models.detection.rpn import AnchorGenerator
from torchvision.ops import MultiScaleRoIAlign

from sklearn.model_selection import KFold, train_test_split

import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2
from tqdm import tqdm

# Connect your script to Neptuneimport neptune
import neptune
import neptune_config

In [2]:
# Prefix data directory
prefix_dir = '.'

env = 'local'

# Use Yolo
use_yolo = True
cropped = 'cropped_' if use_yolo else ''

# Top level data directory. Here we assume the format of the directory conforms
# to the ImageFolder structure
train_dir = f'{prefix_dir}/data/{cropped}train_imgs'

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = 'rcnn_resnet18'

# Number of classes in the dataset
num_classes = 48

# Batch size for training (change depending on how much memory you have)
batch_size = 4

# Number of epochs and earlystop to train for
num_epochs = 13

num_splits = 10
num_earlystop = 20 if num_epochs // 10 < 20 else num_epochs // 10
# not use
# num_earlystop = 0

# Iput size for resize imgae
input_size = 384

# Learning rate for optimizer
learning_rate = 0.003

# Use K-folds
use_kfolds = False

In [3]:
df = pd.read_csv(f'{prefix_dir}/data/{cropped}train_df.csv')

imgs = df.iloc[:, 0].to_numpy()
motions = df.iloc[:, 1:]
columns = motions.columns.to_list()[::2]
class_labels = [label.replace('_x', '').replace('_y', '') for label in columns]
keypoints = []
for motion in motions.to_numpy():
    a_keypoints = []
    for i in range(0, motion.shape[0], 2):
        a_keypoints.append((float(motion[i]), float(motion[i+1])))
    keypoints.append(a_keypoints)
keypoints = np.array(keypoints)

In [4]:
neptune.init(project_qualified_name='mybirth0407/dacon-motion',
             api_token=neptune_config.token)

with open(f'{prefix_dir}/counter.txt', 'r+') as f:
    content = f.read().strip()
    counter = int(content) + 1
    f.seek(0)
    f.write(f'{counter}')
    print(counter)

# Create experiment
neptune.create_experiment(f'{counter:3d} - {model_name}')

neptune.log_metric('batch_size', batch_size)
neptune.log_metric('num_epochs', num_epochs)
neptune.log_metric('num_splits', num_splits)
neptune.log_metric('num_ealrystop', num_earlystop)
neptune.log_metric('input_size', input_size)
neptune.log_metric('learning_rate', learning_rate)
neptune.log_metric('use_kfolds', use_kfolds)
neptune.log_metric('use_yolo', use_yolo)

159
https://ui.neptune.ai/mybirth0407/dacon-motion/e/DAC-160


In [5]:
def train_model(model, dataloaders, optimizer, earlystop=0, num_epochs=25, monitor='val'):
    since = time.time()
    
    train_loss_history = []
    val_loss_history = []
    
    earlystop_value = 0

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 999999999
    
    for epoch in range(num_epochs):
        epoch_since = time.time()
        if earlystop and earlystop_value >= earlystop:
            break

        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
#         for phase in ['train', 'val']:
        for phase in ['train']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            
            # Iterate over data.
            for imgs, targets in dataloaders[phase]:
                imgs = [img.to(device) for img in imgs]
                targets = [{k: v.to(device) for k, v in target.items()} for target in targets]
#                 targets = [{k: v.to(device) for k, v in targets.items()}]
#                 boxes = {k: v.to(device) for k, v in boxes.items()}
#                 labels = {k: v.to(device) for k, v in labels.items()}
#                 keypoints = {k: v.to(device) for k, v in keypoints.items()}
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase=='train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    outputs = model(imgs, targets)
                    loss = sum(output for output in outputs.values())
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss
                # for classification
#                 running_corrects += torch.sum(preds == labels.data)
                # for regression
#                 running_corrects += torch.sum(outputs == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
#             epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            epoch_time_elapsed = time.time() - epoch_since
            print('{} ({}) Loss: {:.4f} Elapsed time: {:.0f}m {:.0f}s'.format(
                phase, len(dataloaders[phase].dataset), epoch_loss, epoch_time_elapsed // 60, epoch_time_elapsed % 60))
            neptune.log_metric(f'{phase}_loss', epoch_loss)
#             neptune.log_metric(f'{phase}_acc', epoch_acc)
            
                
            # deep copy the model
            if phase == 'val':
                if monitor == 'val':
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        best_model_wts = copy.deepcopy(model.state_dict())
                        torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter:3d}_{model_name}.pt')
                        print('copied model')
                        earlystop_value = 0
                    else:
                        earlystop_value += 1
                    val_loss_history.append(epoch_loss)
            elif phase == 'train':
                if monitor == 'train':
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        best_model_wts = copy.deepcopy(model.state_dict())
                        torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter:3d}_{model_name}.pt')
                        print('copied model')
                        earlystop_value = 0
                    else:
                        earlystop_value += 1
                    train_loss_history.append(epoch_loss)
        print()

    time_elapsed = time.time() - since
    print('Training and Validation complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    if monitor == 'val':
        print('Best Validation Loss: {:4f}\n'.format(best_loss))
    elif monitor == 'train':
        print('Best Training Loss: {:4f}\n'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, best_loss

In [6]:
def initialize_model(pretrained=False, progress=True, num_classes=2, num_keypoints=24,
                     pretrained_backbone=True, trainable_backbone_layers=None,
                     rpn_anchor_generator=None, box_roi_pool=None, keypoint_roi_pool=None):
    
    backbone = backbone_utils.resnet_fpn_backbone('resnet18', pretrained_backbone, trainable_layers=trainable_backbone_layers)
    
    model = KeypointRCNN(
        backbone, 
        num_classes=num_classes,
        num_keypoints=num_keypoints,
        rpn_anchor_generator=rpn_anchor_generator,
        box_roi_pool=box_roi_pool,
        keypoint_roi_pool=keypoint_roi_pool
    )
    return model

In [7]:
anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
# anchor_sizes = ((32,), (64,), (128,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(
    anchor_sizes, aspect_ratios
)

roi_pooler = MultiScaleRoIAlign(
    featmap_names=['0'], output_size=7, sampling_ratio=2
)
keypoint_roi_pooler = MultiScaleRoIAlign(
    featmap_names=['0'], output_size=14, sampling_ratio=2
)

# Initialize the model for this run
model_ft = initialize_model(
    pretrained=False, progress=True, num_classes=2, num_keypoints=24,
    pretrained_backbone=True, trainable_backbone_layers=3,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler, keypoint_roi_pool=keypoint_roi_pooler
)

for param in model_ft.backbone.fpn.parameters():
    param.requires_grad = True

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Print the model we just instantiated
# print(model_ft)

In [8]:
# # Data augmentation and normalization for training
# # Just resize and normalization for validation

A_transforms = {
    'train':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
            A.RandomBrightnessContrast(p=0.3),
            A.HorizontalFlip(p=0.3),
#             A.RandomRotate90(p=0.3),
            A.VerticalFlip(p=0.3),
            A.MotionBlur(p=0.3),
            A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']),
            keypoint_params=A.KeypointParams(format='xy')),
    
    'val':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
#             A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ], bbox_params=A.BboxParams(format='pascal_voc', label_fields=['labels']),
            keypoint_params=A.KeypointParams(format='xy')),
    
    'test':
        A.Compose([
            A.Resize(input_size, input_size, always_apply=True),
#             A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ToTensorV2()
        ])
}

In [9]:
class Dataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, keypoints, phase, class_labels=None, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.keypoints = keypoints
        self.phase = phase
        self.class_labels = class_labels
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]), cv2.COLOR_BGR2RGB)
        
        keypoints = self.keypoints[idx]
        x1, y1 = self.keypoints[idx][:, 0].min(), self.keypoints[idx][:, 1].min()
        x2, y2 = self.keypoints[idx][:, 0].max(), self.keypoints[idx][:, 1].max()
        bboxes = np.array([[x1, y1, x2, y2]], dtype=int)
        labels = np.array([1], dtype=int)
        targets = {
            'image': img,
            'bboxes': bboxes,
            'labels': labels, # human is 1, 0 is background
            'keypoints': keypoints
        }
        
        if self.data_transforms:
            targets = self.data_transforms[self.phase](**targets)
            img = targets['image']
        
        targets = {
            'labels': torch.as_tensor(targets['labels'], dtype=torch.int64),
            'boxes': torch.as_tensor(targets['bboxes'], dtype=torch.float32),
            'keypoints': torch.as_tensor(
                np.concatenate([targets['keypoints'], np.ones((24, 1))], axis=1)[np.newaxis], dtype=torch.float32)
        }
        return img, targets
    
    def __len__(self):
        return len(self.imgs)

In [10]:
def collate_fn(batch: torch.Tensor) -> Tuple:
    return tuple(zip(*batch))

In [11]:
print(f'k-folds use: {use_kfolds}')

full_since = time.time()

since = time.time()
# X_train, X_val, y_train, y_val = train_test_split(imgs, keypoints, test_size=1/num_splits, random_state=42)
train_data = Dataset(train_dir, imgs, keypoints, data_transforms=A_transforms, class_labels=class_labels, phase='train')
# val_data = Dataset(train_dir, X_val, y_val, data_transforms=A_transforms, class_labels=class_labels, phase='val')
train_loader = data_utils.DataLoader(train_data, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
# val_loader = data_utils.DataLoader(val_data, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
# dataloaders = {'train': train_loader, 'val': val_loader}
dataloaders = {'train': train_loader}

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model_ft.parameters(), lr=learning_rate)
# optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.02, momentum=0.9, weight_decay=1e-4)

# Train and evaluate
model_ft, best_loss = train_model(
    model_ft, dataloaders, optimizer_ft,
    num_epochs=num_epochs, earlystop=num_earlystop, monitor='train')
torch.save(model_ft.state_dict(), f'{prefix_dir}/{env}/{counter:3d}_{model_name}_{best_loss:.2f}.pt')
time_elapsed = time.time() - since
print('Elapsed time: {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))

fulltime_elapsed = time.time() - full_since
print('All process done!\nElapsed time: {:.0f}m {:.0f}s\n'.format(fulltime_elapsed // 60, fulltime_elapsed % 60))

k-folds use: False
Epoch 1/13
----------
train (4195) Loss: 1149991.6250 Elapsed time: 10m 29s
copied model

Epoch 2/13
----------
train (4195) Loss: 2.0739 Elapsed time: 10m 47s
copied model

Epoch 3/13
----------
train (4195) Loss: 2.0710 Elapsed time: 10m 21s
copied model

Epoch 4/13
----------
train (4195) Loss: 2.0714 Elapsed time: 10m 34s

Epoch 5/13
----------
train (4195) Loss: 2.0711 Elapsed time: 10m 9s

Epoch 6/13
----------
train (4195) Loss: 2.0711 Elapsed time: 10m 23s

Epoch 7/13
----------
train (4195) Loss: 2.0717 Elapsed time: 10m 36s

Epoch 8/13
----------
train (4195) Loss: 2.0719 Elapsed time: 10m 42s

Epoch 9/13
----------
train (4195) Loss: 2.0719 Elapsed time: 10m 31s

Epoch 10/13
----------
train (4195) Loss: 2.0713 Elapsed time: 10m 24s

Epoch 11/13
----------
train (4195) Loss: 2.0721 Elapsed time: 10m 16s

Epoch 12/13
----------
train (4195) Loss: 2.0721 Elapsed time: 10m 9s

Epoch 13/13
----------
train (4195) Loss: 2.0716 Elapsed time: 10m 17s

Training an

In [12]:
# model_ft.load_state_dict(torch.load(f'{prefix_dir}/{env}/{counter:3d}_{model_name}_{best_loss:.2f}.pt'))

<All keys matched successfully>

In [18]:
test_dir = f'{prefix_dir}/data/{cropped}test_imgs'
test_imgs = os.listdir(test_dir)

In [19]:
class TestDataset(data_utils.Dataset):
    """__init__ and __len__ functions are the same as in TorchvisionDataset"""
    def __init__(self, data_dir, imgs, phase, data_transforms=None):
        self.data_dir = data_dir
        self.imgs = imgs
        self.phase = phase
        self.data_transforms = data_transforms

    def __getitem__(self, idx):
        filename = self.imgs[idx]
        # Read an image with OpenCV
        img = cv2.imread(os.path.join(self.data_dir, self.imgs[idx]), cv2.COLOR_BGR2RGB)
        h = img.shape[0]
        w = img.shape[1]
        if self.data_transforms:
            augmented = self.data_transforms[self.phase](image=img)
            img = augmented['image']
        return filename, img, (h, w)
    
    def __len__(self):
        return len(self.imgs)
    
test_data = TestDataset(test_dir, test_imgs, data_transforms=A_transforms, phase='test')
test_loader = data_utils.DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [20]:
model_ft

KeypointRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(640, 672, 704, 736, 768, 800), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64)
          (relu): ReLU(inplace=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64)
        )
        (1): BasicBlock(
          (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1)

In [21]:
img = cv2.imread(os.path.join(test_dir, test_imgs[0]), cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (384, 384))
img = img / 255.0
img = img.transpose(2, 0, 1)
img = [torch.as_tensor(img, dtype=torch.float32).to(device)]

In [22]:
model_ft.eval()
# with torch.no_grad():
print(model_ft(img))

[{'boxes': tensor([], device='cuda:0', size=(0, 4), grad_fn=<StackBackward>), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0', grad_fn=<IndexBackward>), 'keypoints': tensor([], device='cuda:0', size=(0, 24, 3)), 'keypoints_scores': tensor([], device='cuda:0', size=(0, 24))}]


In [None]:
all_predictions = []
files = []
shapes = []
model_ft.eval()
# with torch.no_grad():
for filenames, inputs, shape in tqdm(test_loader):
    predictions = model_ft(inputs.to(device))
    break
#         files.extend(filenames)
        
#         shapes.extend(shape)
#         for prediction in predictions:
#             all_predictions.append(prediction)
            
# origin_shape_y = shapes[0].numpy()
# origin_shape_x = shapes[1].numpy()
# for i in range(1, len(shapes) // 2):
#     origin_shape_y = np.append(origin_shape_y, shapes[2*i].numpy())
#     origin_shape_x = np.append(origin_shape_x, shapes[2*i + 1].numpy())

# all_predictions = np.array(all_predictions)
# for i in range(all_predictions.shape[0]):
#     all_predictions[i, [2*j for j in range(num_classes//2)]] /= input_size / origin_shape_x[i]
#     all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] /= input_size / origin_shape_y[i]

In [None]:
files

In [None]:
res_df = pd.read_csv(f'{prefix_dir}/data/res_test_df.csv')
res = res_df.iloc[:, 1:].to_numpy()

all_predictions = np.array(all_predictions)
for i in range(all_predictions.shape[0]):
    all_predictions[i, [2*j for j in range(num_classes//2)]] += res[i][0]
    all_predictions[i, [2*j + 1 for j in range(num_classes//2)]] += res[i][1]

In [None]:
df_sub = pd.read_csv(f'{prefix_dir}/data/sample_submission.csv')
df = pd.DataFrame(columns=df_sub.columns)
df['image'] = files
df.iloc[:, 1:] = all_predictions
df.head()

In [None]:
df.to_csv(f'{prefix_dir}/submissions/submission_{counter:3d}_{model_name}{model_ver}_{best_loss:.2f}.csv', index=False)

In [None]:
print(counter)