In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# pose_2d
# pose_3d

In [3]:
from pathlib import Path
import pickle
import json
import numpy as np

keypoint_2d_path = ''
keypoint_3d_path = ''

synthetic_cabin_ir_1m_root_path = Path('/root/data/processed/synthetic_cabin_1m/') / 'all_views'
drive_and_act_root_path = Path('/root/data/processed/drive_and_act/') / 'inner_mirror'

synthetic_cabin_ir_1m_keypoint_2d_path = synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.json'
drive_and_act_keypoint_2d_path = drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json'
keypoint_3d_path = synthetic_cabin_ir_1m_root_path / 'annotations'
bbox_path = synthetic_cabin_ir_1m_root_path / 'person_detection_results'

In [4]:
pose_2d = []
pose_3d = []
train_actors = ['vp1', 'vp2', 'vp3', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8']
views = set(['Dashboard', 'Front', 'OMS_01'])
synthetic_data_mapper = {}
with open(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.pkl', 'rb') as f:
    synthetic_data = pickle.load(f)
    for item in synthetic_data['annotations']:
        synthetic_data_mapper[item['image_id']] = dict(
            pose_2d=np.array(item['keypoints']).reshape(-1, 3)[:,:2],
            pose_3d=np.array(item['keypoints3D']).reshape(-1, 3)
        )

for item in synthetic_data['images']:
    if item['view'] in views:
        pose_2d.append(synthetic_data_mapper[item['id']]['pose_2d'])
        pose_3d.append(synthetic_data_mapper[item['id']]['pose_3d'])

drive_and_act_kps_mapper = {}
with open(drive_and_act_keypoint_2d_path) as f:
    drive_and_act_kps = json.loads(f.read())
    for item in drive_and_act_kps:
        drive_and_act_kps_mapper[item['image_id']] = np.array(item['keypoints']).reshape(-1, 3)[:,:2]

with open(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json') as f:
    drive_and_act_anns = json.loads(f.read())

for item in drive_and_act_anns['images']:
    if item['actor'] in train_actors:
        pose_2d.append(drive_and_act_kps_mapper[item['id']])

In [5]:
synthetic_cabin_ir_1m_v2_dataset_root_path = Path('/root/data/processed/synthetic_cabin_1m/')
keypoint_3d_path = synthetic_cabin_ir_1m_v2_dataset_root_path / 'all_views' / 'annotations'

In [6]:
import os
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from modules.lifter_2d_3d.dataset.gan_keypoint_dataset import GANKeypointDataset
from modules.lifter_2d_3d.dataset.synthetic_cabin_ir_1m_dataset import SyntheticCabinIR1MKeypointDataset
from modules.lifter_2d_3d.dataset.drive_and_act_keypoint_dataset import DriveAndActKeypointDataset

from modules.lifter_2d_3d.model.linear_model.lit_linear_model import BaselineModel
from modules.lifter_2d_3d.model.repnet.lit_repnet import LitRepNet
from modules.utils.visualization import (
    generate_connection_line, get_sample_from_loader, visualize_pose
)
from IPython.display import display

pl.seed_everything(1234)

train_dataset = GANKeypointDataset(
    pose_2d,
    pose_3d,
    subset_percentage=5
)

Global seed set to 1234


In [7]:
val_dataset = SyntheticCabinIR1MKeypointDataset(
    prediction_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    annotation_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    bbox_file=(synthetic_cabin_ir_1m_root_path / 'person_detection_results' / f'ground_truth_human_detection_val.json').as_posix(),
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    is_normalize_rotation=True,
    is_gt_2d_pose=True,
    included_view='Dashboard_Front_OMS_01',
    subset_percentage=100
)
test_dataset = DriveAndActKeypointDataset(
    prediction_file=(drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json').as_posix(),
    bbox_file=(drive_and_act_root_path / 'person_detection_results' / 'human_detection_train.json').as_posix(),
    image_width=1280,
    image_height=1024,
    actors=['vp11', 'vp12', 'vp13', 'vp14'],
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    is_normalize_rotation=True
)
all_activities = test_dataset.activities
print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)

train_dataset 8750 val_dataset 87500 test_dataset 11017


In [8]:
class DataModule(pl.LightningDataModule):
    def __init__(self, train_dataset, val_dataset, test_dataset):
        super().__init__()
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.test_dataset = test_dataset

    def train_dataloader(self):
        self.train_dataset.shuffle()
        return DataLoader(self.train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=64, drop_last=True, num_workers=24)

    def test_dataloader(self):
        return DataLoader(test_dataset, batch_size=64, num_workers=24)
dm = DataModule(train_dataset, val_dataset, test_dataset)

In [9]:

# train_loader = DataLoader(train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)
# val_loader = DataLoader(val_dataset, batch_size=64, drop_last=True, num_workers=24)
# test_loader = DataLoader(test_dataset, batch_size=64, num_workers=24)

model_checkpoint = ModelCheckpoint(monitor='mpjpe',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor='mpjpe', mode="min", patience=2)

# ------------
# model
# ------------
lifter_2D_3D = BaselineModel(exclude_ankle=True, exclude_knee=True)
lit_model = LitRepNet(
    lifter_2D_3D=lifter_2D_3D,
    all_activities=all_activities,
)
# ------------
# training
# ------------
saved_model_path = './saved_lifter_2d_3d_model/rq3/repnet'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device', device)
# device = 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=100,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=1,
    default_root_dir=saved_model_path,
    # gradient_clip_val=1.0
    reload_dataloaders_every_n_epochs=1,
    log_every_n_steps=1
)
# trainer.fit(lit_model, train_loader, val_loader)
trainer.fit(lit_model, dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


device cuda


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | lifter_2D_3D  | BaselineModel      | 4.3 M 
1 | camera_net    | CameraNet          | 4.0 M 
2 | generator     | RepNet             | 8.3 M 
3 | discriminator | DiscriminatorModel | 89.2 K
-----------------------------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.650    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

check #0
val MPJPE from: 128 samples : 2380.923271179199
val P-MPJPE from: 128 samples : 2226.1156506496127
