In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !rm -rf saved_lifter_2d_3d_model/synthetic_cabin_ir/A_Pillar_Codriver/prediction/linear_model/lightning_logs

In [3]:
import pandas as pd
import numpy as np
import torch
import lightning.pytorch as pl
import matplotlib.pyplot as plt
# import plotly
import plotly.express as px

In [4]:
import os
import torch
import pandas as pd
import numpy as np
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from modules.lifter_2d_3d.model.linear_model.linear_model import BaselineModel
from modules.lifter_2d_3d.dataset.simple_keypoint_dataset import SimpleKeypointDataset
from modules.lifter_2d_3d.dataset.drive_and_act_keypoint_dataset import DriveAndActKeypointDataset
from modules.lifter_2d_3d.model.linear_model.lit_linear_model import LitSimpleBaselineLinear
from modules.utils.visualization import generate_connection_line, get_sample_from_loader, visualize_pose
from IPython.display import display

pl.seed_everything(1234)

train_dataset = SimpleKeypointDataset(
    prediction_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_train.json",
    annotation_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/annotations/person_keypoints_train.json",
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_hip=True
)
val_dataset = SimpleKeypointDataset(
    prediction_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_val.json",
    annotation_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/annotations/person_keypoints_val.json",
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_hip=True
)
test_dataset = SimpleKeypointDataset(
    prediction_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_test.json",
    annotation_file="/root/data/processed/synthetic_cabin_ir/A_Pillar_Codriver/annotations/person_keypoints_test.json",
    image_width=1280,
    image_height=1024,
    exclude_ankle=True,
    exclude_hip=True
)

print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)
train_loader = DataLoader(train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)
val_loader = DataLoader(val_dataset, batch_size=64, drop_last=True, num_workers=24)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=24)

model_checkpoint = ModelCheckpoint(monitor='val_loss',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=5)

# ------------
# model
# ------------
lit_model = LitSimpleBaselineLinear(exclude_ankle=True, exclude_hip=True)
# ------------
# training
# ------------
saved_model_path = './saved_lifter_2d_3d_model/synthetic_cabin_ir/A_Pillar_Codriver/prediction/linear_model/'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=200,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=5,
    default_root_dir=saved_model_path,
    gradient_clip_val=1.0
)
trainer.fit(lit_model, train_loader, val_loader)

Global seed set to 1234
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


train_dataset 37499 val_dataset 6250 test_dataset 6251


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type          | Params
----------------------------------------
0 | model | BaselineModel | 4.3 M 
----------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
17.105    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

check #0
val MPJPE from: 0 batches : 4060.011863708496


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

check #1
training loss from 2925 batches: 314.13409610079907
val MPJPE from: 0 batches : 66.44697487354279


Validation: 0it [00:00, ?it/s]

check #2
training loss from 2925 batches: 81.27195040767009
val MPJPE from: 0 batches : 43.831028044223785


Validation: 0it [00:00, ?it/s]

check #3
training loss from 2925 batches: 67.71235816371747
val MPJPE from: 0 batches : 40.517374873161316


Validation: 0it [00:00, ?it/s]

check #4
training loss from 2925 batches: 62.41780182362622
val MPJPE from: 0 batches : 40.90750217437744


Validation: 0it [00:00, ?it/s]

check #5
training loss from 2925 batches: 58.928358222429566
val MPJPE from: 0 batches : 38.017261773347855


Validation: 0it [00:00, ?it/s]

check #6
training loss from 2925 batches: 56.7404765705777
val MPJPE from: 0 batches : 38.377758115530014


Validation: 0it [00:00, ?it/s]

check #7
training loss from 2925 batches: 54.40114759609231
val MPJPE from: 0 batches : 36.95473074913025


Validation: 0it [00:00, ?it/s]

check #8
training loss from 2925 batches: 52.96515759367209
val MPJPE from: 0 batches : 37.222638726234436


Validation: 0it [00:00, ?it/s]

check #9
training loss from 2925 batches: 51.790922582149506
val MPJPE from: 0 batches : 35.978712141513824


Validation: 0it [00:00, ?it/s]

check #10
training loss from 2925 batches: 50.70438422453709
val MPJPE from: 0 batches : 34.77229177951813


Validation: 0it [00:00, ?it/s]

check #11
training loss from 2925 batches: 49.85574061289812
val MPJPE from: 0 batches : 35.15169024467468


Validation: 0it [00:00, ?it/s]

check #12
training loss from 2925 batches: 49.21045241829677
val MPJPE from: 0 batches : 35.57996824383736


Validation: 0it [00:00, ?it/s]

check #13
training loss from 2925 batches: 48.56195956595943
val MPJPE from: 0 batches : 34.47902947664261


Validation: 0it [00:00, ?it/s]

check #14
training loss from 2925 batches: 48.07855745283966
val MPJPE from: 0 batches : 34.771256148815155


Validation: 0it [00:00, ?it/s]

check #15
training loss from 2925 batches: 47.658584161191925
val MPJPE from: 0 batches : 34.59235653281212


Validation: 0it [00:00, ?it/s]

check #16
training loss from 2925 batches: 47.326024973239655
val MPJPE from: 0 batches : 34.452758729457855


Validation: 0it [00:00, ?it/s]

check #17
training loss from 2925 batches: 47.12204026615518
val MPJPE from: 0 batches : 34.212540835142136


Validation: 0it [00:00, ?it/s]

check #18
training loss from 2925 batches: 46.84729359470881
val MPJPE from: 0 batches : 34.317199140787125


Validation: 0it [00:00, ?it/s]

check #19
training loss from 2925 batches: 46.641576514284836
val MPJPE from: 0 batches : 34.15295481681824


Validation: 0it [00:00, ?it/s]

check #20
training loss from 2925 batches: 46.577424222332795
val MPJPE from: 0 batches : 34.54241156578064


Validation: 0it [00:00, ?it/s]

check #21
training loss from 2925 batches: 46.43396802692332
val MPJPE from: 0 batches : 34.417524933815


Validation: 0it [00:00, ?it/s]

check #22
training loss from 2925 batches: 46.316350494694504
val MPJPE from: 0 batches : 33.95784646272659


Validation: 0it [00:00, ?it/s]

check #23
training loss from 2925 batches: 46.09921671513818
val MPJPE from: 0 batches : 34.55303981900215


Validation: 0it [00:00, ?it/s]

check #24
training loss from 2925 batches: 46.16929894329136
val MPJPE from: 0 batches : 34.19467434287071


Validation: 0it [00:00, ?it/s]

check #25
training loss from 2925 batches: 46.094795760945374
val MPJPE from: 0 batches : 34.159719944000244


Validation: 0it [00:00, ?it/s]

check #26
training loss from 2925 batches: 46.106135603199654
val MPJPE from: 0 batches : 34.05671939253807


Validation: 0it [00:00, ?it/s]

check #27
training loss from 2925 batches: 45.88579015217276
val MPJPE from: 0 batches : 34.07639265060425


In [7]:
train_dataset = DriveAndActKeypointDataset(
    # prediction_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_train.json",
    # annotation_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/annotations/person_keypoints_train.json",
    prediction_file="/root/data/processed/drive_and_act/keypoint_detection_results/keypoint_detection_train.json",
    annotation_file="/root/data/processed/drive_and_act/annotations/person_keypoints_train.json",
    image_width=1280,
    image_height=1024,
    # actors=['vp1', 'vp2', 'vp3', 'vp4', 'vp5', 'vp6', 'vp7', 'vp8', 'vp9', 'vp10'],
    actors=['vp1', 'vp2', 'vp3', 'vp4', 'vp5'],
    exclude_ankle=True,
    exclude_hip=True
)
val_dataset = DriveAndActKeypointDataset(
    # prediction_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_val.json",
    # annotation_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/annotations/person_keypoints_val.json",
    prediction_file="/root/data/processed/drive_and_act/keypoint_detection_results/keypoint_detection_val.json",
    annotation_file="//root/data/processed/drive_and_act/annotations/person_keypoints_val.json",
    image_width=1280,
    image_height=1024,
    actors=['vp11', 'vp12'],
    exclude_ankle=True,
    exclude_hip=True
)
test_dataset = DriveAndActKeypointDataset(
    # prediction_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/keypoint_detection_results/keypoint_detection_test.json",
    # annotation_file="/root/data/processed/synthetic_cabin_bw/A_Pillar_Codriver/annotations/person_keypoints_test.json",
    prediction_file="/root/data/processed/drive_and_act/keypoint_detection_results/keypoint_detection_test.json",
    annotation_file="/root/data/processed/drive_and_act/annotations/person_keypoints_test.json",
    image_width=1280,
    image_height=1024,
    actors=['vp13', 'vp14', 'vp15'],
    exclude_ankle=True,
    exclude_hip=True
)

print(
    'train_dataset', len(train_dataset),
    'val_dataset', len(val_dataset),
    'test_dataset', len(test_dataset)
)
train_loader = DataLoader(train_dataset, batch_size=64, drop_last=True, shuffle=True, num_workers=24)
val_loader = DataLoader(val_dataset, batch_size=64, drop_last=True, num_workers=24)
test_loader = DataLoader(test_dataset, batch_size=64, num_workers=24)

model_checkpoint = ModelCheckpoint(monitor='val_loss',mode='min', save_top_k=1)
early_stopping = EarlyStopping(monitor="val_loss", mode="min", patience=5)

# ------------
# model
# ------------
# lit_model = LitSimpleBaselineLinear(exclude_ankle=True, exclude_hip=True)
# loaded_lit_model = lit_model.load_from_checkpoint(
#     # checkpoint_path="saved_lifter_2d_3d_model/drive_and_act/prediction/linear_model/lightning_logs/version_15/checkpoints/epoch=94-step=11780.ckpt"
#     checkpoint_path="saved_lifter_2d_3d_model/synthetic_cabin_ir/A_Pillar_Codriver/prediction/linear_model/lightning_logs/version_1/checkpoints/epoch=69-step=40950.ckpt"
# )
# ------------
# training
# ------------
# saved_model_path = './saved_lifter_2d_3d_model/synthetic_cabin_ir/A_Pillar_Codriver/prediction/linear_model/'
saved_model_path = './saved_lifter_2d_3d_model/drive_and_act/prediction/linear_model'
if not os.path.exists(saved_model_path):
    os.makedirs(saved_model_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
trainer = pl.Trainer(
    # max_steps=10,
    max_epochs=100,
    callbacks=[model_checkpoint, early_stopping],
    accelerator=device,
    check_val_every_n_epoch=5,
    default_root_dir=saved_model_path,
    gradient_clip_val=1.0
)
trainer.fit(lit_model, train_loader, val_loader)

skipping problematic image 15783
skipping problematic image 17258
skipping problematic image 17259
skipping problematic image 33527


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type          | Params
----------------------------------------
0 | model | BaselineModel | 4.3 M 
----------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
17.105    Total estimated model params size (MB)


train_dataset 14792 val_dataset 5103 test_dataset 8414


Sanity Checking: 0it [00:00, ?it/s]

check #49
val MPJPE from: 0 batches : 45.08065804839134


Training: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7f7f4fb8e200>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1478, in __del__
    self._shutdown_workers()
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/dataloader.py", line 1442, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/opt/conda/lib/python3.10/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/opt/conda/lib/python3.10/multiprocessing/popen_fork.py", line 40, in wait
    if not wait([self.sentinel], timeout):
  File "/opt/conda/lib/python3.10/multiprocessing/connection.py", line 931, in wait
    ready = selector.select(timeout)
  File "/opt/conda/lib/python3.10/selectors.py", line 416, in select
    fd_event_list = self._selector.poll(timeout)
KeyboardInterrupt: 


RuntimeError: DataLoader worker (pid(s) 189665, 189689, 189713, 189737, 189738, 189739, 189809, 189810, 189834, 189858, 189905, 189929, 189953, 189954, 189978, 189979, 190003, 190050, 190074, 190075, 190122, 190146, 190147, 190217) exited unexpectedly

In [None]:
import json
predictions = {}
with open("/root/data/processed/drive_and_act/keypoint_detection_results/keypoint_detection_train.json") as f:
    data = json.loads(f.read())
    for item in data:
        predictions[item['image_id']] = item

In [None]:
# best_checkpoint_path = model_checkpoint.best_model_path
# trainer.test(ckpt_path=best_checkpoint_path, dataloaders=test_loader)

In [None]:
sample = get_sample_from_loader(val_loader)

In [None]:
valid_keypoints = (sample[1].sum(axis=1) != 0)

In [None]:
results = generate_connection_line(sample[1],
        np.argwhere(valid_keypoints).reshape(-1))
pose_df = pd.DataFrame(results)
visualize_pose(pose_df)

In [None]:
model = trainer.model.to(device)
model.eval()
estimated_pose = model(torch.flatten(torch.tensor(sample[0])).unsqueeze(0).float().to(device), 0)
estimated_pose_df = pd.DataFrame(
    generate_connection_line(
        estimated_pose[0].cpu().reshape([-1, 3]).detach().numpy(),
        # np.argwhere(valid_keypoints).reshape(-1)
    )
)
visualize_pose(estimated_pose_df)