In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# pose_2d
# pose_3d

In [3]:
from pathlib import Path
import pickle
import json
import numpy as np

keypoint_2d_path = ''
keypoint_3d_path = ''
viewpoint_idx = 1
synthetic_cabin_ir_1m_root_path = Path('/root/data/processed/synthetic_cabin_1m/') / 'all_views'
drive_and_act_views = ['a_column_co_driver', 'a_column_driver', 'inner_mirror']
drive_and_act_root_path = Path('/root/data/processed/drive_and_act/') / drive_and_act_views[viewpoint_idx]

# synthetic_cabin_ir_1m_keypoint_2d_path = synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.json'
drive_and_act_keypoint_2d_path = drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json'
# keypoint_3d_path = synthetic_cabin_ir_1m_root_path / 'annotations'
# bbox_path = synthetic_cabin_ir_1m_root_path / 'person_detection_results'

In [4]:
pose_2d = []
pose_3d = []
train_actors = ['vp1', 'vp2', 'vp3', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8']
# views = set(['Dashboard', 'Front', 'OMS_01'])
# views = set(['A_Pillar_Driver', 'Front_Right', 'Front', 'TopRight'])
viewpoints = [
    ['Front_Left', 'Front_TopLeft', 'A_Pillar_Codriver', 'Rear_Mirror'],
    ['Front_Right', 'Front_TopRight', 'A_Pillar_Driver'],
    ['Dashboard', 'OMS_01', 'Front'],
    None,
    ['Front', 'Front_Left', 'OMS_01', 'Dashboard'],
]

views = viewpoints[viewpoint_idx]
view_name = '_'.join(sorted(views))
# 'a_column_co_driver': 'A_Pillar_Codriver_Front_Left_Front_TopLeft_Rear_Mirror',
# 'a_column_driver': 'A_Pillar_Driver_Front_Right_Front_TopRight',
# 'inner_mirror': 'Dashboard_Front_OMS_01'
synthetic_data_mapper = {}
with open(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_train.pkl', 'rb') as f:
    synthetic_data = pickle.load(f)
    for item in synthetic_data['annotations']:
        synthetic_data_mapper[item['image_id']] = dict(
            pose_2d=np.array(item['keypoints']).reshape(-1, 3)[:,:2],
            pose_3d=np.array(item['keypoints3D']).reshape(-1, 3)
        )

for item in synthetic_data['images']:
    if item['view'] in views:
        pose_2d.append(synthetic_data_mapper[item['id']]['pose_2d'])
        pose_3d.append(synthetic_data_mapper[item['id']]['pose_3d'])

drive_and_act_kps_mapper = {}
with open(drive_and_act_keypoint_2d_path) as f:
    drive_and_act_kps = json.loads(f.read())
    for item in drive_and_act_kps:
        drive_and_act_kps_mapper[item['image_id']] = np.array(item['keypoints']).reshape(-1, 3)[:,:2]

with open(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json') as f:
    drive_and_act_anns = json.loads(f.read())

for item in drive_and_act_anns['images']:
    if item['actor'] in train_actors:
        if item['id'] in drive_and_act_kps_mapper:
            pose_2d.append(drive_and_act_kps_mapper[item['id']])

In [5]:
synthetic_cabin_ir_1m_v2_dataset_root_path = Path('/root/data/processed/synthetic_cabin_1m/')
keypoint_3d_path = synthetic_cabin_ir_1m_v2_dataset_root_path / 'all_views' / 'annotations'

In [6]:
import os
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from modules.lifter_2d_3d.dataset.gan_keypoint_dataset import GANKeypointDataset
from modules.lifter_2d_3d.dataset.synthetic_cabin_ir_1m_dataset import SyntheticCabinIR1MKeypointDataset
from modules.lifter_2d_3d.dataset.drive_and_act_keypoint_dataset import DriveAndActKeypointDataset

from modules.lifter_2d_3d.model.linear_model.lit_linear_model import BaselineModel
from modules.lifter_2d_3d.model.repnet.lit_repnet import LitRepNet
from modules.utils.visualization import (
    generate_connection_line, get_sample_from_loader, visualize_pose
)
from IPython.display import display

pl.seed_everything(1234)

train_dataset = GANKeypointDataset(
    pose_2d,
    pose_3d,
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True
)

Global seed set to 1234


In [7]:
val_dataset = SyntheticCabinIR1MKeypointDataset(
    prediction_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    annotation_file=(synthetic_cabin_ir_1m_root_path / 'annotations' / 'person_keypoints_val.json').as_posix(),
    bbox_file=(synthetic_cabin_ir_1m_root_path / 'person_detection_results' / f'ground_truth_human_detection_val.json').as_posix(),
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    # is_normalize_rotation=True,
    is_gt_2d_pose=True,
    included_view=views,
    subset_percentage=100
)
test_dataset = DriveAndActKeypointDataset(
    prediction_file=(drive_and_act_root_path / 'keypoint_detection_results' / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(drive_and_act_root_path / 'annotations' / 'person_keypoints_train.json').as_posix(),
    bbox_file=(drive_and_act_root_path / 'person_detection_results' / 'human_detection_train.json').as_posix(),
    image_width=1280,
    image_height=1024,
    actors=['vp11', 'vp12', 'vp13', 'vp14'],
    exclude_ankle=True,
    exclude_knee=True,
    bbox_format='xyxy',
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    # is_normalize_rotation=True
)
all_activities = test_dataset.activities
print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)

train_dataset 125000 val_dataset 62500 test_dataset 10959


In [8]:
class DataModule(pl.LightningDataModule):
    def __init__(self, train_dataset, val_dataset, test_dataset):
        super().__init__()
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.test_dataset = test_dataset

    def train_dataloader(self):
        self.train_dataset.shuffle()
        return DataLoader(self.train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=64, drop_last=True, num_workers=24)

    def test_dataloader(self):
        return DataLoader(test_dataset, batch_size=64, num_workers=24)
dm = DataModule(train_dataset, val_dataset, test_dataset)

In [9]:

# train_loader = DataLoader(train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)
# val_loader = DataLoader(val_dataset, batch_size=64, drop_last=True, num_workers=24)
# test_loader = DataLoader(test_dataset, batch_size=64, num_workers=24)

model_checkpoint = ModelCheckpoint(monitor='mpjpe',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor='mpjpe', mode="min", patience=5)

# ------------
# model
# ------------
lifter_2D_3D = BaselineModel(exclude_ankle=True, exclude_knee=True)
lit_model = LitRepNet(
    lifter_2D_3D=lifter_2D_3D,
    all_activities=all_activities,
)
# ------------
# training
# ------------
saved_model_path = './saved_lifter_2d_3d_model/rq3/repnet/synthetic_and_real/co_driver_pillar'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('device', device)
# device = 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=100,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=1,
    default_root_dir=saved_model_path,
    # gradient_clip_val=1.0
    reload_dataloaders_every_n_epochs=1,
    log_every_n_steps=1
)
# trainer.fit(lit_model, train_loader, val_loader)
trainer.fit(lit_model, dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


device cuda


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | lifter_2D_3D  | BaselineModel      | 4.3 M 
1 | camera_net    | CameraNet          | 4.0 M 
2 | generator     | RepNet             | 8.3 M 
3 | discriminator | DiscriminatorModel | 89.2 K
-----------------------------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.650    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

check #0
val MPJPE from: 128 samples : 2342.7369594573975
val P-MPJPE from: 128 samples : 2189.4767198425075


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

check #1
training loss from 1953 batches:
d_loss = 3.985507103338403
g_loss = -0.27661477044583344
c_loss = 1.6952360658479604
pose_2d_loss = 21.46005831934279
total_g_loss = 22.878679626121073
val MPJPE from: 62464 samples : 510.2905035018921
val P-MPJPE from: 62464 samples : 396.8697757853684


Validation: 0it [00:00, ?it/s]

check #2
training loss from 1953 batches:
d_loss = 3.0675375420071807
g_loss = -0.2948867012248305
c_loss = 1.696363891202611
pose_2d_loss = 1.237800030725404
total_g_loss = 2.6392772167203855
val MPJPE from: 62464 samples : 393.5927748680115
val P-MPJPE from: 62464 samples : 299.9650555106294


Validation: 0it [00:00, ?it/s]

check #3
training loss from 1953 batches:
d_loss = 5.766446107757195
g_loss = -0.5234622515211549
c_loss = 0.7678103607950977
pose_2d_loss = 0.4770758644440695
total_g_loss = 0.7214239721443491
val MPJPE from: 62464 samples : 353.2557487487793
val P-MPJPE from: 62464 samples : 286.2947673510532


Validation: 0it [00:00, ?it/s]

check #4
training loss from 1953 batches:
d_loss = 7.587106237999611
g_loss = -0.7933859027658923
c_loss = 0.09477574096041738
pose_2d_loss = 0.10714095579703466
total_g_loss = -0.5914692054772096
val MPJPE from: 62464 samples : 375.97140669822693
val P-MPJPE from: 62464 samples : 268.7540032820038


Validation: 0it [00:00, ?it/s]

check #5
training loss from 1953 batches:
d_loss = 4.333091296236514
g_loss = -0.8141501553726697
c_loss = 0.05271088082583681
pose_2d_loss = 0.20492013984231525
total_g_loss = -0.5565191349019408
val MPJPE from: 62464 samples : 188.78977000713348
val P-MPJPE from: 62464 samples : 121.90026924603055


Validation: 0it [00:00, ?it/s]

check #6
training loss from 1953 batches:
d_loss = 0.33119918507487117
g_loss = -0.4956655337323119
c_loss = 0.07355767573552809
pose_2d_loss = 0.14843692107882428
total_g_loss = -0.27367093627323447
val MPJPE from: 62464 samples : 302.42034792900085
val P-MPJPE from: 62464 samples : 220.07248751747602


Validation: 0it [00:00, ?it/s]

check #7
training loss from 1953 batches:
d_loss = 0.2275008742477701
g_loss = -0.3654157374830534
c_loss = 0.07484577813734626
pose_2d_loss = 0.15283145857769834
total_g_loss = -0.13773850067740395
val MPJPE from: 62464 samples : 182.20078945159912
val P-MPJPE from: 62464 samples : 130.7664123555565


Validation: 0it [00:00, ?it/s]

check #8
training loss from 1953 batches:
d_loss = 0.3390498112293921
g_loss = -0.17998615284562416
c_loss = 0.0381711909411064
pose_2d_loss = 0.09823062093866464
total_g_loss = -0.04358434103166087
val MPJPE from: 62464 samples : 199.67179000377655
val P-MPJPE from: 62464 samples : 129.84275398453502


Validation: 0it [00:00, ?it/s]

check #9
training loss from 1953 batches:
d_loss = 0.4195115970580229
g_loss = -0.2642977972001341
c_loss = 0.032855488766219396
pose_2d_loss = 0.10083726025675269
total_g_loss = -0.13060504837458523
val MPJPE from: 62464 samples : 430.0924837589264
val P-MPJPE from: 62464 samples : 388.7584454442843


Validation: 0it [00:00, ?it/s]

check #10
training loss from 1953 batches:
d_loss = 0.5429928530278462
g_loss = -0.7783652955730085
c_loss = 0.03292336591833671
pose_2d_loss = 0.08954106252144543
total_g_loss = -0.6559008676396598
val MPJPE from: 62464 samples : 207.16160535812378
val P-MPJPE from: 62464 samples : 136.60371188738557


Validation: 0it [00:00, ?it/s]

check #11
training loss from 1953 batches:
d_loss = 0.17352165929597355
g_loss = -0.31585634941939994
c_loss = 0.027524625785225364
pose_2d_loss = 0.10265849689779927
total_g_loss = -0.18567322666866012
val MPJPE from: 62464 samples : 209.10583436489105
val P-MPJPE from: 62464 samples : 143.81021852867244


Validation: 0it [00:00, ?it/s]

check #12
training loss from 1953 batches:
d_loss = 0.12008395920475659
g_loss = -0.2551837022556016
c_loss = 0.02812352515346978
pose_2d_loss = 0.1010264526428898
total_g_loss = -0.1260337244225232
val MPJPE from: 62464 samples : 200.4631906747818
val P-MPJPE from: 62464 samples : 137.57903674102582


In [10]:
best_checkpoint_path = model_checkpoint.best_model_path
trainer.test(ckpt_path=best_checkpoint_path, datamodule=dm)

Restoring states from the checkpoint path at saved_lifter_2d_3d_model/rq3/repnet/synthetic_and_real/co_driver_pillar/lightning_logs/version_1/checkpoints/epoch=6-step=27342.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at saved_lifter_2d_3d_model/rq3/repnet/synthetic_and_real/co_driver_pillar/lightning_logs/version_1/checkpoints/epoch=6-step=27342.ckpt


Testing: 0it [00:00, ?it/s]

[{'mpjpe': 247.72658944129944,
  'p_mpjpe': 130.28864511497096,
  'activity_macro_mpjpe': 257.7551603317261,
  'p_activity_macro_mpjpe': 144.90475432319326}]