In [1]:
import os
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.utils.data import DataLoader
from modules.lifter_2d_3d.dataset.drive_and_act_keypoint_dataset import DriveAndActKeypointDataset
from modules.lifter_2d_3d.model.graformer.lit_graformer import LitGraformer
from modules.utils.visualization import (
    plot_samples
)
from IPython.display import display
from pathlib import Path

pl.seed_everything(1234)

# ------------
# dataset path
# ------------
dataset_root_path = Path('/root/data/processed/drive_and_act/')
keypoint_2d_path = dataset_root_path / 'keypoint_detection_results'
keypoint_3d_path = dataset_root_path / 'annotations'
bbox_file = dataset_root_path / 'person_detection_results'
# ------------
# model
# ------------
image_width = 1280
image_height = 1024
batch_size = 64
max_epoch = 200
val_check_period = 5
early_stopping_patience = 5
# ------------
# saved model path
# ------------
saved_model_path = './saved_lifter_2d_3d_model/graformer/drive_and_act/A_Pillar_Codriver/predicted_2d/all_actors/'


train_dataset = DriveAndActKeypointDataset(
    prediction_file=(keypoint_2d_path / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(keypoint_3d_path / 'person_keypoints_train.json').as_posix(),
    bbox_file=(bbox_file / 'human_detection_train.json').as_posix(),
    image_width=image_width,
    image_height=image_height,
    # actors=['vp1', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8', 'vp9', 'vp10', 'vp15'],
    actors=['vp1', 'vp2', 'vp3', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8'],
    exclude_ankle=True,
    exclude_knee=True,
    # is_normalize_to_bbox=True,
    # bbox_format='xyxy'

    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    is_normalize_rotation=True
)
val_dataset = DriveAndActKeypointDataset(
    prediction_file=(keypoint_2d_path / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(keypoint_3d_path / 'person_keypoints_train.json').as_posix(),
    bbox_file=(bbox_file / 'human_detection_train.json').as_posix(),
    image_width=image_width,
    image_height=image_height,
    # actors=['vp2', 'vp3'],
    actors=['vp9', 'vp10', 'vp15'],
    exclude_ankle=True,
    exclude_knee=True,
    # is_normalize_to_bbox=True,
    # bbox_format='xyxy'

    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    is_normalize_rotation=True
)
test_dataset = DriveAndActKeypointDataset(
    prediction_file=(keypoint_2d_path / 'keypoint_detection_train.json').as_posix(),
    annotation_file=(keypoint_3d_path / 'person_keypoints_train.json').as_posix(),
    bbox_file=(bbox_file / 'human_detection_train.json').as_posix(),
    image_width=image_width,
    image_height=image_height,
    actors=['vp11', 'vp12', 'vp13', 'vp14'],
    # actors=['vp13', 'vp14', 'vp15'],
    exclude_ankle=True,
    exclude_knee=True,
    # is_normalize_to_bbox=True,
    # bbox_format='xyxy' 
    is_center_to_neck=True,
    is_normalize_to_bbox=False,
    is_normalize_to_pose=True,
    is_normalize_rotation=True
)
all_activities = train_dataset.activities.union(val_dataset.activities).union(test_dataset.activities)
lit_model = LitGraformer(exclude_ankle=True, exclude_knee=True, all_activities=all_activities,
                        #  is_silence=False,
                        #  learning_rate=1e-3
                         )
print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)
train_loader = DataLoader(train_dataset, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=24)
val_loader = DataLoader(val_dataset, batch_size=batch_size, drop_last=True, num_workers=24)
test_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=24)


model_checkpoint = ModelCheckpoint(monitor='val_loss',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=early_stopping_patience)

if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=max_epoch,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=val_check_period,
    default_root_dir=saved_model_path,
    gradient_clip_val=1.0
)
trainer.fit(lit_model, train_loader, val_loader)

Global seed set to 1234


skipping problematic image 3628
skipping problematic image 5874
skipping problematic image 14835
skipping problematic image 15783
skipping problematic image 17258
skipping problematic image 17259
skipping problematic image 21271
skipping problematic image 21272
skipping problematic image 21273
skipping problematic image 21274
skipping problematic image 21275
skipping problematic image 21276
skipping problematic image 32959
skipping problematic image 33527
skipping problematic image 28113


  adj = torch.tensor(
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


train_dataset 22881 val_dataset 6240 test_dataset 11017


Missing logger folder: saved_lifter_2d_3d_model/graphmlp/drive_and_act/A_Pillar_Codriver/predicted_2d/all_actors/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | model | GraFormer | 926 K 
------------------------------------
926 K     Trainable params
0         Non-trainable params
926 K     Total params
3.708     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

check #0
val MPJPE from: 128 samples : 1114.497184753418


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

check #1
training loss from 1785 batches: 129.96534166716728
val MPJPE from: 6208 samples : 76.69416069984436


Validation: 0it [00:00, ?it/s]

check #2
training loss from 1785 batches: 108.94380931009431
val MPJPE from: 6208 samples : 85.85063368082047


Validation: 0it [00:00, ?it/s]

check #3
training loss from 1785 batches: 106.52381805729132
val MPJPE from: 6208 samples : 76.0408565402031


Validation: 0it [00:00, ?it/s]

check #4
training loss from 1785 batches: 104.1338769828572
val MPJPE from: 6208 samples : 75.0146135687828


Validation: 0it [00:00, ?it/s]

check #5
training loss from 1785 batches: 101.52614308672459
val MPJPE from: 6208 samples : 83.07494968175888


Validation: 0it [00:00, ?it/s]

check #6
training loss from 1785 batches: 99.07019022299129
val MPJPE from: 6208 samples : 75.17120242118835


Validation: 0it [00:00, ?it/s]

check #7
training loss from 1785 batches: 96.3573038911953
val MPJPE from: 6208 samples : 72.94146716594696


Validation: 0it [00:00, ?it/s]

check #8
training loss from 1785 batches: 93.89798633482943
val MPJPE from: 6208 samples : 77.20399647951126


Validation: 0it [00:00, ?it/s]

check #9
training loss from 1785 batches: 91.70257975073422
val MPJPE from: 6208 samples : 75.33077150583267


Validation: 0it [00:00, ?it/s]

check #10
training loss from 1785 batches: 89.6321643321287
val MPJPE from: 6208 samples : 80.35537600517273


Validation: 0it [00:00, ?it/s]

check #11
training loss from 1785 batches: 87.74142477776157
val MPJPE from: 6208 samples : 77.18906551599503


Validation: 0it [00:00, ?it/s]

check #12
training loss from 1785 batches: 86.19076237917281
val MPJPE from: 6208 samples : 79.65315133333206


In [8]:
LitGraformer.load_from_checkpoint(model_checkpoint.best_model_path)

  adj = torch.tensor(


LitGraformer(
  (model): GraFormer(
    (gconv_input): ChebConv()
    (gconv_layers): ModuleList(
      (0-3): 4 x _ResChebGC(
        (gconv1): _GraphConv(
          (gconv): ChebConv()
          (relu): ReLU()
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (gconv2): _GraphConv(
          (gconv): ChebConv()
          (relu): ReLU()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (atten_layers): ModuleList(
      (0-3): 4 x GraAttenLayer(
        (self_attn): MultiHeadedAttention(
          (linears): ModuleList(
            (0-3): 4 x Linear(in_features=128, out_features=128, bias=True)
          )
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (feed_forward): GraphNet(
          (gconv1): LAM_Gconv(
            (fc): Linear(in_features=128, out_features=256, bias=True)
            (activation): ReLU(inplace=True)
          )
          (gconv2): LAM_Gconv(
            (fc): Linear(in_features=256, out_features

In [2]:
with open(f'{saved_model_path}/best_model_path.txt', 'w') as f:
    f.writelines(model_checkpoint.best_model_path)
best_checkpoint_path = model_checkpoint.best_model_path
trainer.test(ckpt_path=best_checkpoint_path, dataloaders=test_loader)

Restoring states from the checkpoint path at saved_lifter_2d_3d_model/graphmlp/drive_and_act/A_Pillar_Codriver/predicted_2d/all_actors/lightning_logs/version_0/checkpoints/epoch=34-step=12495.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at saved_lifter_2d_3d_model/graphmlp/drive_and_act/A_Pillar_Codriver/predicted_2d/all_actors/lightning_logs/version_0/checkpoints/epoch=34-step=12495.ckpt


Testing: 0it [00:00, ?it/s]

MPJPE: 66.88190251588821
PJPE
                      PJPE
nose             55.406219
left_eye         55.342503
right_eye        53.822598
left_ear         13.151522
right_ear        38.542057
left_shoulder    30.467731
right_shoulder   27.312246
left_elbow       74.249596
right_elbow      66.055267
left_wrist       83.904182
right_wrist     105.228134
left_hip         67.982887
right_hip        61.749561
activities_mpjpe:
{'sitting_still': 63.07036429643631, 'closing_door_inside': 133.79251956939697, 'entering_car': 211.49703860282898, 'opening_bottle': 76.51782780885696, 'closing_bottle': 82.39523321390152, 'drinking': 73.47695529460907, 'pressing_automation_button': 57.17076361179352, 'fetching_an_object': 150.4868119955063, 'eating': 60.81360951066017, 'placing_an_object': 141.84750616550446, 'preparing_food': 132.28167593479156, 'opening_backpack': 192.77071952819824, 'reading_newspaper': 85.44767647981644, 'taking_off_sunglasses': 104.37838733196259, 'using_multimedia_display': 98

[{'mpjpe': 66.88190251588821}]

In [3]:
pd.DataFrame(lit_model.test_history[0]['activities_mpjpe'], index=['mpjpe']).T.sort_values('mpjpe')

Unnamed: 0,mpjpe
pressing_automation_button,57.170764
eating,60.81361
sitting_still,63.070364
reading_magazine,69.709174
working_on_laptop,72.73417
drinking,73.476955
opening_bottle,76.517828
interacting_with_phone,77.377588
closing_bottle,82.395233
reading_newspaper,85.447676


In [4]:
pd.DataFrame(lit_model.test_history[0]['activities_mpjpe'], index=['mpjpe']).T.mean()

mpjpe    128.497498
dtype: float64

In [5]:
plot_samples(
    dataset_root_path,
    trainer.model,
    test_loader,
    'train',
    img_figsize=(20, 10),
    plot_figsize=(20.5, 10),
    sample_idices=[1000, 2500, 6000],
    is_plot_gt_skeleton=False
)

[347.3514, 286.7826, 657.2451, 620.2427]


TypeError: LitGraformer.forward() missing 1 required positional argument: 'batch_idx'