In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# pose_2d
# pose_3d

In [3]:
from pathlib import Path
import pickle
import json
import numpy as np

keypoint_2d_path = ''
keypoint_3d_path = ''

synthetic_cabin_ir_1m_root_path = Path('/root/data/processed/synthetic_cabin_1m/') / 'all_views'
drive_and_act_root_path = Path('/root/data/processed/drive_and_act/') / 'inner_mirror'

# synthetic_cabin_ir_1m_keypoint_2d_path = synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.json'
drive_and_act_keypoint_2d_path = drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json'
# keypoint_3d_path = synthetic_cabin_ir_1m_root_path / 'annotations'
# bbox_path = synthetic_cabin_ir_1m_root_path / 'person_detection_results'

In [4]:
pose_2d = []
pose_3d = []
train_actors = ['vp1', 'vp2', 'vp3', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8']
# views = set(['Dashboard', 'Front', 'OMS_01'])
# views = set(['A_Pillar_Driver', 'Front_Right', 'Front', 'TopRight'])
viewpoints = [
    ['Front_Left', 'Front_TopLeft', 'A_Pillar_Codriver', 'Rear_Mirror'],
    ['Front_Right', 'Front_TopRight', 'A_Pillar_Driver'],
    ['Dashboard', 'OMS_01', 'Front'],
    None,
    ['Front', 'Front_Left', 'OMS_01', 'Dashboard'],
]

views = '_'.join(viewpoints[2])

# 'a_column_co_driver': 'A_Pillar_Codriver_Front_Left_Front_TopLeft_Rear_Mirror',
# 'a_column_driver': 'A_Pillar_Driver_Front_Right_Front_TopRight',
# 'inner_mirror': 'Dashboard_Front_OMS_01'
synthetic_data_mapper = {}
with open(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.pkl', 'rb') as f:
    synthetic_data = pickle.load(f)
    for item in synthetic_data['annotations']:
        synthetic_data_mapper[item['image_id']] = dict(
            pose_2d=np.array(item['keypoints']).reshape(-1, 3)[:,:2],
            pose_3d=np.array(item['keypoints3D']).reshape(-1, 3)
        )

for item in synthetic_data['images']:
    if item['view'] in views:
        pose_2d.append(synthetic_data_mapper[item['id']]['pose_2d'])
        pose_3d.append(synthetic_data_mapper[item['id']]['pose_3d'])

drive_and_act_kps_mapper = {}
with open(drive_and_act_keypoint_2d_path) as f:
    drive_and_act_kps = json.loads(f.read())
    for item in drive_and_act_kps:
        drive_and_act_kps_mapper[item['image_id']] = np.array(item['keypoints']).reshape(-1, 3)[:,:2]

with open(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json') as f:
    drive_and_act_anns = json.loads(f.read())

for item in drive_and_act_anns['images']:
    if item['actor'] in train_actors:
        pose_2d.append(drive_and_act_kps_mapper[item['id']])

In [5]:
synthetic_cabin_ir_1m_v2_dataset_root_path = Path('/root/data/processed/synthetic_cabin_1m/')
keypoint_3d_path = synthetic_cabin_ir_1m_v2_dataset_root_path / 'all_views' / 'annotations'

In [6]:
import os
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from modules.lifter_2d_3d.dataset.gan_keypoint_dataset import GANKeypointDataset
from modules.lifter_2d_3d.dataset.synthetic_cabin_ir_1m_dataset import SyntheticCabinIR1MKeypointDataset
from modules.lifter_2d_3d.dataset.drive_and_act_keypoint_dataset import DriveAndActKeypointDataset

from modules.lifter_2d_3d.model.linear_model.lit_linear_model import BaselineModel
from modules.lifter_2d_3d.model.repnet.lit_repnet import LitRepNet
from modules.utils.visualization import (
    generate_connection_line, get_sample_from_loader, visualize_pose
)
from IPython.display import display

pl.seed_everything(1234)

train_dataset = GANKeypointDataset(
    pose_2d,
    pose_3d,
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True
)

Global seed set to 1234


In [7]:
val_dataset = SyntheticCabinIR1MKeypointDataset(
    prediction_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    annotation_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    bbox_file=(synthetic_cabin_ir_1m_root_path / 'person_detection_results' / f'ground_truth_human_detection_val.json').as_posix(),
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    # is_normalize_rotation=True,
    is_gt_2d_pose=True,
    included_view='Dashboard_Front_OMS_01',
    subset_percentage=100
)
test_dataset = DriveAndActKeypointDataset(
    prediction_file=(drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json').as_posix(),
    bbox_file=(drive_and_act_root_path / 'person_detection_results' / 'human_detection_train.json').as_posix(),
    image_width=1280,
    image_height=1024,
    actors=['vp11', 'vp12', 'vp13', 'vp14'],
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    # is_normalize_rotation=True
)
all_activities = test_dataset.activities
print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)

train_dataset 175001 val_dataset 87500 test_dataset 10959


In [8]:
class DataModule(pl.LightningDataModule):
    def __init__(self, train_dataset, val_dataset, test_dataset):
        super().__init__()
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.test_dataset = test_dataset

    def train_dataloader(self):
        self.train_dataset.shuffle()
        return DataLoader(self.train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=64, drop_last=True, num_workers=24)

    def test_dataloader(self):
        return DataLoader(test_dataset, batch_size=64, num_workers=24)
dm = DataModule(train_dataset, val_dataset, test_dataset)

In [9]:

# train_loader = DataLoader(train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)
# val_loader = DataLoader(val_dataset, batch_size=64, drop_last=True, num_workers=24)
# test_loader = DataLoader(test_dataset, batch_size=64, num_workers=24)

model_checkpoint = ModelCheckpoint(monitor='mpjpe',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor='mpjpe', mode="min", patience=5)

# ------------
# model
# ------------
lifter_2D_3D = BaselineModel(exclude_ankle=True, exclude_knee=True)
lit_model = LitRepNet(
    lifter_2D_3D=lifter_2D_3D,
    all_activities=all_activities,
)
# ------------
# training
# ------------
saved_model_path = './saved_lifter_2d_3d_model/rq3/repnet'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device', device)
# device = 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=100,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=1,
    default_root_dir=saved_model_path,
    # gradient_clip_val=1.0
    reload_dataloaders_every_n_epochs=1,
    log_every_n_steps=1
)
# trainer.fit(lit_model, train_loader, val_loader)
trainer.fit(lit_model, dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


device cuda


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | lifter_2D_3D  | BaselineModel      | 4.3 M 
1 | camera_net    | CameraNet          | 4.0 M 
2 | generator     | RepNet             | 8.3 M 
3 | discriminator | DiscriminatorModel | 89.2 K
-----------------------------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.650    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

check #0
val MPJPE from: 128 samples : 2413.00106048584
val P-MPJPE from: 128 samples : 2257.245683200801


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

check #1
training loss from 2734 batches:
d_loss = 4.000194306476498
g_loss = -0.3171215193630922
c_loss = 1.6964033265661478
pose_2d_loss = 19.026358204290826
total_g_loss = 20.405640016704382
val MPJPE from: 87488 samples : 583.5568904876709
val P-MPJPE from: 87488 samples : 507.3185247359221


Validation: 0it [00:00, ?it/s]

check #2
training loss from 2734 batches:
d_loss = 2.646818237140742
g_loss = -0.48108430491282106
c_loss = 1.4266565992676086
pose_2d_loss = 0.6958456196506789
total_g_loss = 1.641417915520654
val MPJPE from: 87488 samples : 268.95180344581604
val P-MPJPE from: 87488 samples : 193.12810890197687


Validation: 0it [00:00, ?it/s]

check #3
training loss from 2734 batches:
d_loss = 1.9838134422217004
g_loss = -0.2322868193074638
c_loss = 0.1469886516341456
pose_2d_loss = 0.15307715130477992
total_g_loss = 0.06777898385717473
val MPJPE from: 87488 samples : 225.6813496351242
val P-MPJPE from: 87488 samples : 143.78792766604963


Validation: 0it [00:00, ?it/s]

check #4
training loss from 2734 batches:
d_loss = 2.9293872515576256
g_loss = -0.6580980637689184
c_loss = 0.07969440921838583
pose_2d_loss = 0.13484089864838947
total_g_loss = -0.44356275564597836
val MPJPE from: 87488 samples : 666.2312150001526
val P-MPJPE from: 87488 samples : 498.104759432239


Validation: 0it [00:00, ?it/s]

check #5
training loss from 2734 batches:
d_loss = 0.7412513962680448
g_loss = -0.783422088919544
c_loss = 0.10468553785158385
pose_2d_loss = 0.16572224428903123
total_g_loss = -0.513014307821983
val MPJPE from: 87488 samples : 195.63935697078705
val P-MPJPE from: 87488 samples : 140.43709919044215


Validation: 0it [00:00, ?it/s]

check #6
training loss from 2734 batches:
d_loss = 0.23228886132057
g_loss = -0.2645832579351291
c_loss = 0.05056089875455223
pose_2d_loss = 0.09627512287302181
total_g_loss = -0.11774723623125066
val MPJPE from: 87488 samples : 219.93203461170197
val P-MPJPE from: 87488 samples : 152.15893118546617


Validation: 0it [00:00, ?it/s]

check #7
training loss from 2734 batches:
d_loss = 0.21779380092464098
g_loss = -0.20204246184027644
c_loss = 0.03560646896876651
pose_2d_loss = 0.08142674722229815
total_g_loss = -0.0850092456638595
val MPJPE from: 87488 samples : 263.0278468132019
val P-MPJPE from: 87488 samples : 187.83755025585404


Validation: 0it [00:00, ?it/s]

check #8
training loss from 2734 batches:
d_loss = 0.10193146333601204
g_loss = -0.2083491168632809
c_loss = 0.029649483672211707
pose_2d_loss = 0.08568365601570657
total_g_loss = -0.09301597704591764
val MPJPE from: 87488 samples : 204.80580627918243
val P-MPJPE from: 87488 samples : 135.51368907818556


Validation: 0it [00:00, ?it/s]

check #9
training loss from 2734 batches:
d_loss = 0.023585450488409054
g_loss = -0.35369488203403454
c_loss = 0.026001678884639823
pose_2d_loss = 0.09054300838695797
total_g_loss = -0.2371501944790204
val MPJPE from: 87488 samples : 190.32154977321625
val P-MPJPE from: 87488 samples : 130.89642623345438


Validation: 0it [00:00, ?it/s]

check #10
training loss from 2734 batches:
d_loss = 0.015559512074275764
g_loss = -0.42226782635170435
c_loss = 0.02471188518192126
pose_2d_loss = 0.09243330615351213
total_g_loss = -0.30512263505408255
val MPJPE from: 87488 samples : 197.6698935031891
val P-MPJPE from: 87488 samples : 137.2220749325854


Validation: 0it [00:00, ?it/s]

check #11
training loss from 2734 batches:
d_loss = 0.009019613534589547
g_loss = -0.45299105681942253
c_loss = 0.022002744558590907
pose_2d_loss = 0.0914857044554402
total_g_loss = -0.3395026083640487
val MPJPE from: 87488 samples : 197.31631875038147
val P-MPJPE from: 87488 samples : 135.0627359206284


Validation: 0it [00:00, ?it/s]

check #12
training loss from 2734 batches:
d_loss = 0.004676292971033404
g_loss = -0.46188073115315675
c_loss = 0.019335597779073132
pose_2d_loss = 0.0907661622552298
total_g_loss = -0.35177897092911475
val MPJPE from: 87488 samples : 195.1470822095871
val P-MPJPE from: 87488 samples : 134.3221557336994


Validation: 0it [00:00, ?it/s]

check #13
training loss from 2734 batches:
d_loss = 0.2252886310197112
g_loss = -0.3043950639185499
c_loss = 0.018619375691041294
pose_2d_loss = 0.088377516670391
total_g_loss = -0.19739817134523663
val MPJPE from: 87488 samples : 1495.0006008148193
val P-MPJPE from: 87488 samples : 1109.4637988366237


Validation: 0it [00:00, ?it/s]

check #14
training loss from 2734 batches:
d_loss = 0.17318714065129795
g_loss = -0.2931862027188811
c_loss = 0.024115689053188602
pose_2d_loss = 0.10593464032316958
total_g_loss = -0.16313587314154251
val MPJPE from: 87488 samples : 190.39294123649597
val P-MPJPE from: 87488 samples : 135.89011717565833


In [10]:
best_checkpoint_path = model_checkpoint.best_model_path
trainer.test(ckpt_path=best_checkpoint_path, datamodule=dm)

Restoring states from the checkpoint path at saved_lifter_2d_3d_model/rq3/repnet/lightning_logs/version_3/checkpoints/epoch=8-step=49212.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at saved_lifter_2d_3d_model/rq3/repnet/lightning_logs/version_3/checkpoints/epoch=8-step=49212.ckpt


Testing: 0it [00:00, ?it/s]

[{'mpjpe': 164.70582783222198,
  'p_mpjpe': 106.36243001577783,
  'activity_macro_mpjpe': 177.86507308483124,
  'p_activity_macro_mpjpe': 127.4467489217613}]