In [None]:
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import argparse
import sys
import json
import math
import numpy as np
import copy
from tqdm import tqdm
import wandb

from config import config
from model import siMLPe as Model
from datasets.epfl_sk30 import EPFLSK30Dataset
from utils.logger import get_logger, print_and_log_info
from utils.pyt_utils import link_file, ensure_dir
from datasets.epfl_sk30_eval import EPFLSK30Eval

from test import test

import torch
from torch.utils.data import DataLoader

In [2]:
exp_name = "epfl_sk30_baseline_downsample2"

#E https://docs.pytorch.org/docs/stable/generated/torch.use_deterministic_algorithms.html
#E Throws runtime error if there only exists nondetermistic operations (AvgPool3D MaxPool3D... )
#E Runs deterministic ones if there exists (Conv1D Conv2D Conv3D... check out the website for more)
torch.use_deterministic_algorithms(True)
acc_log = open(exp_name, 'a')
torch.manual_seed(config.seed)

# Initialize wandb
wandb.init(
    project="siMLPe-EPFL-SK30",
    name=exp_name,
    config={
        "exp_name": exp_name,
        "seed": config.seed,
        "with_normalization": config.motion_mlp.with_normalization,
        "spatial_fc": config.motion_mlp.spatial_fc_only,
        "num_layers": config.motion_mlp.num_layers,
        # Model config
        "motion_input_length": config.motion.epfl_input_length,
        "motion_target_length_train": config.motion.epfl_target_length_train,
        "motion_target_length_eval": config.motion.epfl_target_length_eval,
        "motion_dim": config.motion.dim,
        "data_aug": config.data_aug,
        "deriv_input": config.deriv_input,
        "deriv_output": config.deriv_output,
        "use_relative_loss": config.use_relative_loss,
        # Training config
        "batch_size": config.batch_size,
        "num_workers": config.num_workers,
        "cos_lr_max": config.cos_lr_max,
        "cos_lr_min": config.cos_lr_min,
        "cos_lr_total_iters": config.cos_lr_total_iters,
        "weight_decay": config.weight_decay,
        "print_every": config.print_every,
        "save_every": config.save_every,
    }
)


acc_log.write(''.join('Seed : ' + str(config.seed) + '\n'))

[34m[1mwandb[0m: Currently logged in as: [33memredmrcx[0m ([33memredmrcx-itu-edu-tr[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


11

In [3]:
#E Compute Discrete Cosine Transform (DCT) matrix and its inverse (IDCT) for a given size N. 
#E DCT matrix is used to transform data into the frequency domain, and its inverse allows transformation back to the original domain. 
def get_dct_matrix(N):
    dct_m = np.eye(N)
    for k in np.arange(N):
        for i in np.arange(N):
            w = np.sqrt(2 / N)
            if k == 0:
                w = np.sqrt(1 / N)
            dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
    idct_m = np.linalg.inv(dct_m)
    return dct_m, idct_m


#E Compute DCT and IDCT matrices for the input length of the EPFL SK30 dataset.
dct_m,idct_m = get_dct_matrix(config.motion.epfl_input_length_dct)
dct_m = torch.tensor(dct_m).float().cuda().unsqueeze(0)
idct_m = torch.tensor(idct_m).float().cuda().unsqueeze(0)
 

In [4]:
def update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer) :
    if nb_iter > 30000:
        current_lr = 1e-5
    else:
        current_lr = 3e-4

    for param_group in optimizer.param_groups:
        param_group["lr"] = current_lr

    return optimizer, current_lr

#E Compute velocity for the loss function.
def gen_velocity(m):
    dm = m[:, 1:] - m[:, :-1]
    return dm

In [5]:
def train_step(epfl_motion_input, epfl_motion_target, model, optimizer, nb_iter, total_iter, max_lr, min_lr) :
    
    # Input shape: (b, n, 17, 3) - reshape to (b, n, 51) for model
    b, n, num_joints, _ = epfl_motion_input.shape
    epfl_motion_input_flat = epfl_motion_input.reshape(b, n, -1)  # (b, n, 51)
    
    #E config.deriv_input = True: Use DCT matrix
    #E config.deriv_input = False: Use original input
    if config.deriv_input:
        epfl_motion_input_ = epfl_motion_input_flat.clone()
        epfl_motion_input_ = torch.matmul(dct_m[:, :, :config.motion.epfl_input_length], epfl_motion_input_.cuda())
    else:
        epfl_motion_input_ = epfl_motion_input_flat.clone()

    #E Predict the motion and use IDCT to get the original motion.
    motion_pred = model(epfl_motion_input_.cuda())
    motion_pred = torch.matmul(idct_m[:, :config.motion.epfl_input_length, :], motion_pred)

    #E config.deriv_output = True: meaning the model predicts motion deltas (differences) rather than absolute positions. 
    #E offset = the last input frame (epfl_motion_input[:, -1:])
    #E and it's added to the predicted deltas to reconstruct the absolute motion sequence
    #E config.deriv_output = False: the model predicts absolute positions directly, so no offset is needed.
    if config.deriv_output:
        offset = epfl_motion_input_flat[:, -1:].cuda()
        motion_pred = motion_pred[:, :config.motion.epfl_target_length_train] + offset
    else:
        motion_pred = motion_pred[:, :config.motion.epfl_target_length_train]

    #E Compute the loss between the predicted motion and the target motion.
    #E Reshape predictions and targets to (b, n, 17, 3) format
    b_target, n_target, _, _ = epfl_motion_target.shape
    motion_pred = motion_pred.reshape(b_target, n_target, 17, 3)
    
    # Compute position loss
    position_loss = torch.mean(torch.norm(motion_pred - epfl_motion_target.cuda(), 2, 3))

    #E This computes the velocity loss and adds it to the position loss.
    velocity_loss = 0.0
    if config.use_relative_loss:
        dmotion_pred = gen_velocity(motion_pred)
        dmotion_gt = gen_velocity(epfl_motion_target.cuda())
        velocity_loss = torch.mean(torch.norm(dmotion_pred - dmotion_gt, 2, 3))
        loss = position_loss + velocity_loss
    else:
        loss = position_loss

    # Log metrics to wandb
    wandb.log({
        "train/loss": loss.detach().cpu().item(),
        "train/position_loss": position_loss.detach().cpu().item(),
        "train/velocity_loss": velocity_loss.detach().cpu().item() if config.use_relative_loss else 0.0,
        "train/iteration": nb_iter
    })

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    optimizer, current_lr = update_lr_multistep(nb_iter, total_iter, max_lr, min_lr, optimizer)
    
    # Log learning rate to wandb
    wandb.log({
        "train/learning_rate": current_lr,
        "train/iteration": nb_iter
    })

    return loss.item(), optimizer, current_lr

In [6]:
model = Model(config)
model.train()
model.cuda()

siMLPe(
  (arr0): Rearrange('b n d -> b d n')
  (arr1): Rearrange('b d n -> b n d')
  (motion_mlp): TransMLP(
    (mlps): Sequential(
      (0): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50, out_features=50, bias=True)
        )
        (norm0): LN()
      )
      (1): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50, out_features=50, bias=True)
        )
        (norm0): LN()
      )
      (2): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50, out_features=50, bias=True)
        )
        (norm0): LN()
      )
      (3): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50, out_features=50, bias=True)
        )
        (norm0): LN()
      )
      (4): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50, out_features=50, bias=True)
        )
        (norm0): LN()
      )
      (5): MLPblock(
        (fc0): Temporal_FC(
          (fc): Linear(in_features=50

In [7]:
dataset = EPFLSK30Dataset(config, 'train', config.data_aug)

shuffle = True
sampler = None
dataloader = DataLoader(dataset, batch_size=config.batch_size,
                        num_workers=config.num_workers, drop_last=True,
                        sampler=sampler, shuffle=shuffle, pin_memory=True)

In [8]:
eval_config = copy.deepcopy(config)
eval_dataset = EPFLSK30Eval(eval_config, 'test')


shuffle = False
sampler = None
eval_dataloader = DataLoader(eval_dataset, batch_size=128,
                        num_workers=1, drop_last=False,
                        sampler=sampler, shuffle=shuffle, pin_memory=True)

In [9]:
# initialize optimizer
optimizer = torch.optim.Adam(model.parameters(),
                             lr=config.cos_lr_max,
                             weight_decay=config.weight_decay)

ensure_dir(config.snapshot_dir)
logger = get_logger(config.log_file, 'train')
link_file(config.log_file, config.link_log_file)

print_and_log_info(logger, json.dumps(config, indent=4, sort_keys=True))

if config.model_pth is not None :
    state_dict = torch.load(config.model_pth)
    model.load_state_dict(state_dict, strict=True)
    print_and_log_info(logger, "Loading model path from {} ".format(config.model_pth))
    
# Log model architecture to wandb
wandb.watch(model, log="all", log_freq=1000)

##### ------ training ------- #####
nb_iter = 0
avg_loss = 0.
avg_lr = 0.

#E Training loop 
print("training loop")
pbar = tqdm(total=config.cos_lr_total_iters, desc="Training", unit="iter")

while (nb_iter + 1) < config.cos_lr_total_iters:

    for (epfl_motion_input, epfl_motion_target) in dataloader:

        loss, optimizer, current_lr = train_step(epfl_motion_input, epfl_motion_target, model, optimizer, nb_iter, config.cos_lr_total_iters, config.cos_lr_max, config.cos_lr_min)
        avg_loss += loss
        avg_lr += current_lr

        if (nb_iter + 1) % config.print_every ==  0 :
            avg_loss = avg_loss / config.print_every
            avg_lr = avg_lr / config.print_every

            print_and_log_info(logger, "Iter {} Summary: ".format(nb_iter + 1))
            print_and_log_info(logger, f"\t lr: {avg_lr} \t Training loss: {avg_loss}")
            
            # Log averaged metrics to wandb
            wandb.log({
                "train/avg_loss": avg_loss,
                "train/avg_lr": avg_lr,
                "train/iteration": nb_iter + 1
            })
            
            avg_loss = 0
            avg_lr = 0

        if (nb_iter + 1) % config.save_every ==  0 :
        
            # Save model checkpoint
            model_path = config.snapshot_dir + '/model-iter-' + str(nb_iter + 1) + '.pth'
            torch.save(model.state_dict(), model_path)
            
            # Save model artifact to wandb
            wandb.save(model_path)
            
            # Evaluate model
            model.eval()
            acc_tmp = test(eval_config, model, eval_dataloader)
            print(acc_tmp)
            
            # Log evaluation metrics to wandb
            eval_metrics = {"eval/iteration": nb_iter + 1}
            
            # Log all accuracy values from acc_tmp
            for i, acc_value in enumerate(acc_tmp):
                eval_metrics[f"eval/accuracy_{i}"] = acc_value
            
            wandb.log(eval_metrics)
            
            acc_log.write(''.join(str(nb_iter + 1) + '\n'))
            line = ''
            for ii in acc_tmp:
                line += str(ii) + ' '
            line += '\n'
            acc_log.write(''.join(line))
            model.train()

        # Update progress bar
        pbar.update(1)
        pbar.set_postfix({
            'loss': f'{loss:.4f}',
            'lr': f'{current_lr:.2e}',
            'iter': nb_iter + 1
        })

        if (nb_iter + 1) == config.cos_lr_total_iters :
            break
        nb_iter += 1

pbar.close()
wandb.finish()


training loop


Training:   0%|          | 0/40000 [00:00<?, ?iter/s]

Training:  13%|█▎        | 5003/40000 [06:34<126:14:05, 12.99s/iter, loss=0.0367, lr=3.00e-04, iter=5003]

[5.2, 10.9, 23.7, 30.2, 44.1, 57.7, 70.9, 80.3]


Training:  25%|██▌       | 10004/40000 [13:08<114:00:54, 13.68s/iter, loss=0.0415, lr=3.00e-04, iter=1e+4]

[4.7, 10.0, 22.1, 28.4, 41.1, 54.0, 66.8, 76.0]


Training:  38%|███▊      | 15004/40000 [19:41<87:32:16, 12.61s/iter, loss=0.0353, lr=3.00e-04, iter=15004] 

[4.5, 9.7, 21.8, 28.3, 41.1, 54.0, 66.9, 76.0]


Training:  50%|█████     | 20003/40000 [26:18<78:58:08, 14.22s/iter, loss=0.0325, lr=3.00e-04, iter=2e+4] 

[4.3, 9.2, 20.3, 26.0, 37.7, 50.0, 62.4, 71.6]


Training:  63%|██████▎   | 25004/40000 [32:53<52:27:53, 12.59s/iter, loss=0.0373, lr=3.00e-04, iter=25004]

[4.2, 9.0, 20.0, 25.6, 37.3, 49.7, 62.5, 71.8]


Training:  75%|███████▌  | 30004/40000 [39:30<38:04:46, 13.71s/iter, loss=0.0351, lr=1.00e-05, iter=3e+4] 

[3.8, 8.4, 18.8, 24.1, 34.8, 46.2, 58.2, 67.1]


Training:  88%|████████▊ | 35004/40000 [46:05<17:58:40, 12.95s/iter, loss=0.0342, lr=1.00e-05, iter=35004]

[3.6, 8.1, 18.2, 23.3, 33.4, 44.0, 55.3, 64.0]


Training: 100%|██████████| 40000/40000 [52:41<00:00, 12.65iter/s, loss=0.0319, lr=1.00e-05, iter=4e+4]    

[3.6, 8.0, 18.1, 23.2, 33.6, 44.5, 56.1, 64.9]





0,1
eval/accuracy_0,█▆▅▄▄▂▁▁
eval/accuracy_1,█▆▅▄▃▂▁▁
eval/accuracy_2,█▆▆▄▃▂▁▁
eval/accuracy_3,█▆▆▄▃▂▁▁
eval/accuracy_4,█▆▆▄▄▂▁▁
eval/accuracy_5,█▆▆▄▄▂▁▁
eval/accuracy_6,█▆▆▄▄▂▁▁
eval/accuracy_7,█▆▆▄▄▂▁▁
eval/iteration,▁▂▃▄▅▆▇█
train/avg_loss,█▆▆▆▆▄▄▅▄▄▄▄▃▃▃▄▃▃▃▂▃▂▂▂▂▂▂▂▂▃▁▁▁▁▁▁▂▁▁▁

0,1
eval/accuracy_0,3.6
eval/accuracy_1,8
eval/accuracy_2,18.1
eval/accuracy_3,23.2
eval/accuracy_4,33.6
eval/accuracy_5,44.5
eval/accuracy_6,56.1
eval/accuracy_7,64.9
eval/iteration,40000
train/avg_loss,0.03186
