# Pose Error Project
### (Transformer-Encoder)

In [1]:
import logging

logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)

logger.info("Loading packages ...")
import os
import sys
import time
import pickle
import json

# 3rd party packages

#from tqdm import tqdm
# since we are using it in jupyter notebook
from tqdm.notebook import tqdm

import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# Project modules
from options import Options
from running import setup, pipeline_factory, validate, check_progress, NEG_METRICS
from utils import utils
from datasets.data import data_factory, Normalizer
from datasets.datasplit import split_dataset
from models.ts_transformer import model_factory
from models.loss import get_loss_module
from optimizers import get_optimizer

import parser


2023-05-25 11:10:12,536 | INFO : Loading packages ...
2023-05-25 11:10:13,799 | INFO : Note: NumExpr detected 32 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2023-05-25 11:10:13,800 | INFO : NumExpr defaulting to 8 threads.


# Setup Configurations

In [2]:
# Setting 1 - Single Stage
# Training From Scratch
'''
text = "--output_dir ../experiments/ --comment 'poseErrorPred_from_Scratch_smooth24' \
        --name poseErrorPred_fromScratch_Regression_Selective_4_64 --records_file Regression_records.xls \
        --data_dir ../data/SenseTimeV4_Selective/ --data_class pose \
        --epochs 25 --lr 0.0001 --optimizer RAdam --batch_size 128 \
        --pos_encoding learnable --task regression --print_interval 1\
        --num_layers 4  --num_heads 8 --d_model 64 --dim_feedforward 256"
'''

'\ntext = "--output_dir ../experiments/ --comment \'poseErrorPred_from_Scratch_smooth24\'         --name poseErrorPred_fromScratch_Regression_Selective_4_64 --records_file Regression_records.xls         --data_dir ../data/SenseTimeV4_Selective/ --data_class pose         --epochs 25 --lr 0.0001 --optimizer RAdam --batch_size 128         --pos_encoding learnable --task regression --print_interval 1        --num_layers 4  --num_heads 8 --d_model 64 --dim_feedforward 256"\n'

In [3]:
# Setting 2 - Two Stages
# Pretrain
'''
text = "--output_dir ../experiments/ --comment 'poseErrorPred_pretrain' \
        --name poseErrorPred_preTrain --records_file Regression_records.xls \
        --data_dir ../data/Hall_LivingRoom_Pretrain/ --data_class pose \
        --val_ratio 0.2 --epochs 50 --lr 0.0001 --optimizer RAdam --batch_size 128 \
        --pos_encoding learnable --task regression --print_interval 1"
'''
# Finetune
text = "--output_dir ../experiments --comment 'poseErrorPred_finetune' \
        --name poseErrorPred_finetuned --records_file Regression_records.xls \
        --data_dir ../data/SenseTimeV4_Selective/ --data_class pose \
        --epochs 25 --lr 0.0001 --optimizer RAdam \
        --pos_encoding learnable --d_model 64 \
        --load_model ../experiments/poseErrorPred_preTrain_2023-05-25_10-45-43_HallnLivingRoom/checkpoints/model_best.pth \
        --task regression --change_output --batch_size 128"


In [4]:
# Process the setting string
# Generate the config variable
input_text = text.split()
args = Options().parse(input_text)
config = setup(args)

2023-05-25 11:10:17,587 | INFO : Stored configuration file in '../experiments/poseErrorPred_finetuned_2023-05-25_11-10-17_jyO'


In [5]:
config

{'config_filepath': None,
 'output_dir': '../experiments/poseErrorPred_finetuned_2023-05-25_11-10-17_jyO',
 'data_dir': '../data/SenseTimeV4_Selective/',
 'load_model': '../experiments/poseErrorPred_preTrain_2023-05-25_10-45-43_HallnLivingRoom/checkpoints/model_best.pth',
 'resume': False,
 'change_output': True,
 'save_all': False,
 'experiment_name': 'poseErrorPred_finetuned',
 'comment': "'poseErrorPred_finetune'",
 'no_timestamp': False,
 'records_file': 'Regression_records.xls',
 'console': False,
 'print_interval': 1,
 'gpu': '0',
 'n_proc': -1,
 'num_workers': 0,
 'seed': None,
 'limit_size': None,
 'test_only': None,
 'data_class': 'pose',
 'labels': None,
 'test_from': None,
 'test_ratio': 0,
 'val_ratio': 0.2,
 'pattern': None,
 'val_pattern': None,
 'test_pattern': None,
 'normalization': 'standardization',
 'norm_from': None,
 'subsample_factor': None,
 'task': 'regression',
 'masking_ratio': 0.15,
 'mean_mask_length': 3,
 'mask_mode': 'separate',
 'mask_distribution': 'geo

# Setup Logger

In [6]:
total_epoch_time = 0
total_eval_time = 0

total_start_time = time.time()

In [7]:
# Add file logging besides stdout
file_handler = logging.FileHandler(os.path.join(config['output_dir'], 'output.log'))
logger.addHandler(file_handler)

logger.info('Running:\n{}\n'.format(' '.join(sys.argv)))  # command used to run

2023-05-25 11:10:20,607 | INFO : Running:
/home/tianyi/anaconda3/envs/transformer/lib/python3.8/site-packages/ipykernel_launcher.py -f /home/tianyi/.local/share/jupyter/runtime/kernel-8a69a3c6-5009-4686-9eef-8d745fcb5b09.json



# Setup Training Device

In [8]:
if config['seed'] is not None:
    torch.manual_seed(config['seed'])

device = torch.device('cuda' if (torch.cuda.is_available() and config['gpu'] != '-1') else 'cpu')
logger.info("Using device: {}".format(device))
if device == 'cuda':
    logger.info("Device index: {}".format(torch.cuda.current_device()))

2023-05-25 11:10:21,698 | INFO : Using device: cuda


# Prepare Data

In [9]:
 # Build data
logger.info("Loading and preprocessing data ...")
data_class = data_factory[config['data_class']]
my_data = data_class(config['data_dir'], 
                     pattern=config['pattern'], 
                     n_proc=config['n_proc'], 
                     limit_size=config['limit_size'], 
                     config=config)
feat_dim = my_data.feature_df.shape[1]  # dimensionality of data features
if config['task'] == 'classification':
    validation_method = 'StratifiedShuffleSplit'
    labels = my_data.labels_df.values.flatten()
else:
    validation_method = 'ShuffleSplit'
    labels = None
    
# Modify for the pose error pred
validation_method = 'PoseErrorTimeSplit'

2023-05-25 11:10:22,965 | INFO : Loading and preprocessing data ...
2023-05-25 11:10:22,967 | INFO : Loading 69 datasets files using 32 parallel processes ...


# Split dataset

In [10]:
# Split dataset
test_data = my_data
test_indices = None  # will be converted to empty list in `split_dataset`, if also test_set_ratio == 0
val_data = my_data
val_indices = []
if config['test_pattern']:  # used if test data come from different files / file patterns
    test_data = data_class(config['data_dir'], pattern=config['test_pattern'], n_proc=-1, config=config)
    test_indices = test_data.all_IDs
if config['test_from']:  # load test IDs directly from file, if available, otherwise use `test_set_ratio`. Can work together with `test_pattern`
    test_indices = list(set([line.rstrip() for line in open(config['test_from']).readlines()]))
    try:
        test_indices = [int(ind) for ind in test_indices]  # integer indices
    except ValueError:
        pass  # in case indices are non-integers
    logger.info("Loaded {} test IDs from file: '{}'".format(len(test_indices), config['test_from']))
if config['val_pattern']:  # used if val data come from different files / file patterns
    val_data = data_class(config['data_dir'], pattern=config['val_pattern'], n_proc=-1, config=config)
    val_indices = val_data.all_IDs

In [11]:
# Note: currently a validation set must exist, either with `val_pattern` or `val_ratio`
# Using a `val_pattern` means that `val_ratio` == 0 and `test_ratio` == 0
if config['val_ratio'] > 0:
    train_indices, val_indices, test_indices = split_dataset(data_indices=my_data.all_IDs,
                                                             validation_method=validation_method,
                                                             n_splits=1,
                                                             validation_ratio=config['val_ratio'],
                                                             test_set_ratio=config['test_ratio'],  # used only if test_indices not explicitly specified
                                                             test_indices=test_indices,
                                                             random_seed=1337,
                                                             labels=labels)
    train_indices = train_indices[0]  # `split_dataset` returns a list of indices *per fold/split*
    val_indices = val_indices[0]  # `split_dataset` returns a list of indices *per fold/split*
else:
    train_indices = my_data.all_IDs
    if test_indices is None:
        test_indices = []

In [12]:
logger.info("{} \t samples may be used for training".format(len(train_indices)))
logger.info("{} \t samples will be used for validation".format(len(val_indices)))
logger.info("{} \t samples will be used for testing".format(len(test_indices)))

2023-05-25 11:11:10,608 | INFO : 88355 	 samples may be used for training
2023-05-25 11:11:10,609 | INFO : 22125 	 samples will be used for validation
2023-05-25 11:11:10,609 | INFO : 0 	 samples will be used for testing


In [13]:
with open(os.path.join(config['output_dir'], 'data_indices.json'), 'w') as f:
    try:
        json.dump({'train_indices': list(map(int, train_indices)),
                   'val_indices': list(map(int, val_indices)),
                   'test_indices': list(map(int, test_indices))}, f, indent=4)
    except ValueError:  # in case indices are non-integers
        json.dump({'train_indices': list(train_indices),
                   'val_indices': list(val_indices),
                   'test_indices': list(test_indices)}, f, indent=4)


In [None]:
# Pre-process features
normalizer = None
if config['norm_from']:
    with open(config['norm_from'], 'rb') as f:
        norm_dict = pickle.load(f)
    normalizer = Normalizer(**norm_dict)
elif config['normalization'] is not None:
    normalizer = Normalizer(config['normalization'])
    my_data.feature_df.loc[train_indices] = normalizer.normalize(my_data.feature_df.loc[train_indices])
    if not config['normalization'].startswith('per_sample'):
        # get normalizing values from training set and store for future use
        norm_dict = normalizer.__dict__
        with open(os.path.join(config['output_dir'], 'normalization.pickle'), 'wb') as f:
            pickle.dump(norm_dict, f, pickle.HIGHEST_PROTOCOL)
if normalizer is not None:
    if len(val_indices):
        val_data.feature_df.loc[val_indices] = normalizer.normalize(val_data.feature_df.loc[val_indices])
    if len(test_indices):
        test_data.feature_df.loc[test_indices] = normalizer.normalize(test_data.feature_df.loc[test_indices])


# Create model

In [None]:
# Create model
logger.info("Creating model ...")
model = model_factory(config, my_data)

if config['freeze']:
    for name, param in model.named_parameters():
        if name.startswith('output_layer'):
            param.requires_grad = True
        else:
            param.requires_grad = False

logger.info("Model:\n{}".format(model))
logger.info("Total number of parameters: {}".format(utils.count_parameters(model)))
logger.info("Trainable parameters: {}".format(utils.count_parameters(model, trainable=True)))


# Initialize optimizer

In [None]:
# Initialize optimizer
if config['global_reg']:
    weight_decay = config['l2_reg']
    output_reg = None
else:
    weight_decay = 0
    output_reg = config['l2_reg']

optim_class = get_optimizer(config['optimizer'])
optimizer = optim_class(model.parameters(), lr=config['lr'], weight_decay=weight_decay)

start_epoch = 0
lr_step = 0  # current step index of `lr_step`
lr = config['lr']  # current learning step
# Load model and optimizer state
if args.load_model:
    model, optimizer, start_epoch = utils.load_model(model, config['load_model'], optimizer, config['resume'],
                                                     config['change_output'],
                                                     config['lr'],
                                                     config['lr_step'],
                                                     config['lr_factor'])
model.to(device)

loss_module = get_loss_module(config)

In [None]:
if config['test_only'] == 'testset':  # Only evaluate and skip training
    dataset_class, collate_fn, runner_class = pipeline_factory(config)
    test_dataset = dataset_class(test_data, test_indices)

    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=config['batch_size'],
                             shuffle=False,
                             num_workers=config['num_workers'],
                             pin_memory=True,
                             collate_fn=lambda x: collate_fn(x, max_len=model.max_len))
    test_evaluator = runner_class(model, test_loader, device, loss_module,
                                        print_interval=config['print_interval'], console=config['console'])
    aggr_metrics_test, per_batch_test = test_evaluator.evaluate(keep_all=True)
    print_str = 'Test Summary: '
    for k, v in aggr_metrics_test.items():
        print_str += '{}: {:8f} | '.format(k, v)
    logger.info(print_str)
    #return

In [None]:
# Initialize data generators
if config['test_only'] != 'testset':  # Only evaluate and skip training
    dataset_class, collate_fn, runner_class = pipeline_factory(config)
    val_dataset = dataset_class(val_data, val_indices)

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=config['batch_size'],
                            shuffle=False,
                            num_workers=config['num_workers'],
                            pin_memory=True,
                            collate_fn=lambda x: collate_fn(x, max_len=model.max_len))

    train_dataset = dataset_class(my_data, train_indices)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=config['batch_size'],
                              shuffle=True,
                              num_workers=config['num_workers'],
                              pin_memory=True,
                              collate_fn=lambda x: collate_fn(x, max_len=model.max_len))

    trainer = runner_class(model, train_loader, device, loss_module, optimizer, l2_reg=output_reg,
                                 print_interval=config['print_interval'], console=config['console'])
    val_evaluator = runner_class(model, val_loader, device, loss_module,
                                       print_interval=config['print_interval'], console=config['console'])

    tensorboard_writer = SummaryWriter(config['tensorboard_dir'])

    best_value = 1e16 if config['key_metric'] in NEG_METRICS else -1e16  # initialize with +inf or -inf depending on key metric
    metrics = []  # (for validation) list of lists: for each epoch, stores metrics like loss, ...
    best_metrics = {}

In [None]:
'''
print(config["batch_size"])
for batch in train_loader:
    X, targets, padding_masks, IDs = batch
    print(X.shape)
    print(X[0])
    print(X.dtype)
    print("-"*20)
    print(targets.shape)
    print(targets[0])
    print(targets.dtype)
    print("-"*20)
    print(padding_masks.shape)
    print(padding_masks[0])
    print("-"*20)
    print(IDs)
    break
'''

# Evaluate on validation before training

In [None]:
aggr_metrics_val, best_metrics, best_value = validate(val_evaluator, tensorboard_writer, config, best_metrics,
                                                      best_value, epoch=0)
metrics_names, metrics_values = zip(*aggr_metrics_val.items())
metrics.append(list(metrics_values))

# Starting training...

In [None]:
logger.info('Starting training...')
for epoch in tqdm(range(start_epoch + 1, config["epochs"] + 1), desc='Training Epoch', leave=False):
    mark = epoch if config['save_all'] else 'last'
    epoch_start_time = time.time()
    # Training
    aggr_metrics_train = trainer.train_epoch(epoch)  # dictionary of aggregate epoch metrics
    epoch_runtime = time.time() - epoch_start_time
    print()
    print_str = 'Epoch {} Training Summary: '.format(epoch)
    for k, v in aggr_metrics_train.items():
        tensorboard_writer.add_scalar('{}/train'.format(k), v, epoch)
        print_str += '{}: {:8f} | '.format(k, v)
    logger.info(print_str)
    logger.info("Epoch runtime: {} hours, {} minutes, {} seconds\n".format(*utils.readable_time(epoch_runtime)))
    total_epoch_time += epoch_runtime
    avg_epoch_time = total_epoch_time / (epoch - start_epoch)
    avg_batch_time = avg_epoch_time / len(train_loader)
    avg_sample_time = avg_epoch_time / len(train_dataset)
    logger.info("Avg epoch train. time: {} hours, {} minutes, {} seconds".format(*utils.readable_time(avg_epoch_time)))
    logger.info("Avg batch train. time: {} seconds".format(avg_batch_time))
    logger.info("Avg sample train. time: {} seconds".format(avg_sample_time))

    # evaluate if first or last epoch or at specified interval
    if (epoch == config["epochs"]) or (epoch == start_epoch + 1) or (epoch % config['val_interval'] == 0):
        aggr_metrics_val, best_metrics, best_value = validate(val_evaluator, tensorboard_writer, config,
                                                              best_metrics, best_value, epoch)
        metrics_names, metrics_values = zip(*aggr_metrics_val.items())
        metrics.append(list(metrics_values))

    utils.save_model(os.path.join(config['save_dir'], 'model_{}.pth'.format(mark)), epoch, model, optimizer)

    # Learning rate scheduling
    if epoch == config['lr_step'][lr_step]:
        utils.save_model(os.path.join(config['save_dir'], 'model_{}.pth'.format(epoch)), epoch, model, optimizer)
        lr = lr * config['lr_factor'][lr_step]
        if lr_step < len(config['lr_step']) - 1:  # so that this index does not get out of bounds
            lr_step += 1
        logger.info('Learning rate updated to: ', lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    # Difficulty scheduling
    if config['harden'] and check_progress(epoch):
        train_loader.dataset.update()
        val_loader.dataset.update()


In [None]:
# Export evolution of metrics over epochs
header = metrics_names
metrics_filepath = os.path.join(config["output_dir"], "metrics_" + config["experiment_name"] + ".xls")
book = utils.export_performance_metrics(metrics_filepath, metrics, header, sheet_name="metrics")

# Export record metrics to a file accumulating records from all experiments
utils.register_record(config["records_file"], config["initial_timestamp"], config["experiment_name"],
                      best_metrics, aggr_metrics_val, comment=config['comment'])

logger.info('Best {} was {}. Other metrics: {}'.format(config['key_metric'], best_value, best_metrics))
logger.info('All Done!')

total_runtime = time.time() - total_start_time
logger.info("Total runtime: {} hours, {} minutes, {} seconds\n".format(*utils.readable_time(total_runtime)))

#return best_value
print(best_value)

# Evaluation

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
base_path = "../experiments/"
experiment = 'poseErrorPred_finetuned_2023-05-16_12-04-46_ia1'
file_path = '/predictions/best_predictions.npz'
total_path = base_path + experiment + file_path
total_path
#total_path = config['pred_dir']

In [None]:
total_path = config['pred_dir'] + '/best_predictions.npz'
total_path

In [None]:
config['pred_dir']

In [None]:
config['output_dir']

In [None]:
pred = np.load(total_path, allow_pickle=True)
pred.files

In [None]:
y = np.concatenate(pred["targets"], axis=0)
y_pred = np.concatenate(pred["predictions"], axis=0)
IDs = np.concatenate(pred["IDs"], axis=0)
y_pred.shape

In [None]:
def get_mape(y, y_pred):
    err = y - y_pred
    return np.mean(np.abs(err)/y)

def get_mse(y, y_pred):
    err = y - y_pred
    return np.mean(np.square(err))

In [None]:
get_mse(y, y_pred)

In [None]:
get_mape(y, y_pred)

In [None]:
def visualize_output(y, y_pred, title=' '):
    fig, (ax0, ax1) = plt.subplots(2, 1, figsize=(14, 6))
    fig.tight_layout(pad=3.0)
    
    ax0.plot(y, color='g', label='truth')
    ax0.set_title(title)
    ax0.set_xlabel('Step')
    ax0.set_ylabel('Error')
    ax0.grid()
    ax0.legend()
    
    ax1.plot(y, color='g', label='truth')
    ax1.plot(y_pred, color='b', alpha=0.7, label='predict')
    mse = get_mse(y, y_pred)
    mape = get_mape(y, y_pred)
    ax1.set_title(title+"- mse: {:.5f} | mape: {:.5f}".format(mse, mape))
    ax1.set_xlabel('Step')
    ax1.set_ylabel('Error')
    ax1.grid()
    ax1.legend()
    plt.show()

In [None]:
visualize_output(y, y_pred, title='Prediction on SenseTime Dataset')

In [None]:
IDs[10000]