In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import numpy as np
import torch
import torch.distributions as dist
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib_inline.backend_inline import set_matplotlib_formats
import matplotlib
import seaborn as sns
from tqdm.auto import tqdm
from signatureshape.animation import fetch_animations
import extratorch as etorch
import shapeflow as sf

In [None]:
# make reproducible
seed = torch.manual_seed(0)

# better plotting
set_matplotlib_formats("pdf", "svg")
matplotlib.rcParams.update({"font.size": 12})
set_matplotlib_formats("pdf", "svg")
plt.style.use("tableau-colorblind10")
sns.set_style("white")

In [None]:
# fetch data as so3
# we assume all have the same skeleton
print("Loading mocap data:")
# walk  data
walk_subjects = ["07", "08", "35", "16"]

walk_animations = []
walk_desc = []
for s in walk_subjects:
    for t in fetch_animations(100, subject_file_name=(s + ".asf")):
        if t[2][:4] == "walk":
            walk_animations.append(t[1])
            walk_desc.append(t[2])

walk_animations_train_frame = sum(
    len(anim.get_frames()) for anim in walk_animations[:18]
)

# run data
run_subjects = ["09", "16", "35"]
run_animations = []
run_skeletons = []
for s in run_subjects:

    for t in fetch_animations(100, subject_file_name=(s + ".asf")):
        if t[2][:3] == "run":
            run_skeletons.append(t[0])
            run_animations.append(t[1])

print("Convert to array:")
walk_angle_array = sf.utils.animation_to_eulers(
    walk_animations,
    reduce_shape=True,
    remove_root=True,
    deg2rad=True,
    skeleton=run_skeletons[0],
    max_frame_count=240,
)
run_angle_array = sf.utils.animation_to_eulers(
    run_animations,
    reduce_shape=True,
    remove_root=True,
    deg2rad=True,
    skeleton=run_skeletons[0],
    max_frame_count=240,
)

In [None]:
run_angle_array.shape, walk_angle_array.shape

In [None]:
walk_angle_tensor_ = torch.tensor(walk_angle_array, dtype=torch.float32)
run_angle_tensor_ = torch.tensor(run_angle_array, dtype=torch.float32)
wr_angle_tensor_ = torch.cat((walk_angle_tensor_, run_angle_tensor_))

# normalize
std, mean = torch.std_mean(wr_angle_tensor_, dim=0)
wr_angle_tensor_norm = (wr_angle_tensor_ - mean) / std
run_angle_tensor_norm = (run_angle_tensor_ - mean) / std
walk_angle_tensor_norm = (walk_angle_tensor_ - mean) / std

nonzero = torch.argwhere(
    torch.sum(torch.abs(torch.diff(wr_angle_tensor_, dim=0)), dim=0) > 0.0
).flatten()
choosen = nonzero[[1, 33, 14, 17, 26]]

walk_angle_tensor = walk_angle_tensor_norm[:, choosen]
run_angle_tensor = run_angle_tensor_norm[:, choosen]
wr_angle_tensor = wr_angle_tensor_norm[:, choosen]

In [None]:
# make priors
superivsed_iterval = 0
run_len = run_angle_tensor.shape[0]
walk_len = walk_angle_tensor.shape[0]
prior_run = torch.cat((torch.zeros(walk_len), torch.ones(run_len)))
# p run, walk
q = torch.stack((prior_run, abs(prior_run - 1)), dim=1)

priors = q.clone().detach()
eps = torch.rand(len(priors)) * 0.1
priors[:, 1] = 0.5 + eps
priors[:, 0] = 0.5 - eps
if superivsed_iterval > 0:
    priors[::superivsed_iterval] = q[::superivsed_iterval]

In [None]:
data_walk = torch.utils.data.TensorDataset(walk_angle_tensor)
data_run = torch.utils.data.TensorDataset(run_angle_tensor)
data = torch.utils.data.TensorDataset(wr_angle_tensor, priors.clone().detach(), priors)

Do clustering experiments:

In [None]:
#######
DIR = "../figures/cluster_frames/"
SET_NAME = "cont_euler_2"
PATH_FIGURES = os.path.join(DIR, SET_NAME)
########

event_shape = data[0][0].shape
base_dist = dist.Independent(
    dist.Normal(loc=torch.zeros(event_shape), scale=torch.ones(event_shape)), 1
)

lr_scheduler = lambda optim: torch.optim.lr_scheduler.ReduceLROnPlateau(
    optim, mode="min", factor=0.5, patience=5, verbose=True
)
MODEL_PARAMS = {
    "model": sf.nf.get_flow,
    "get_transform": sf.transforms.NDETransform,
    "base_dist": base_dist,
    "get_net": etorch.models.FFNN,
    "activation": "tanh",
    "inverse_model": True,
    "num_flows": 2,
    "sensitivity": "autograd",
}
EXTRA_M_PARAMS = {
    "neurons": [16],
    "n_hidden_layers": [4],
}

TRAINING_PARAMS = {
    "batch_size": [5000],
    "compute_loss": [sf.nf.get_monte_carlo_conditional_dkl_loss()],
    "verbose": True,
}
# extend the previous dict with the zip of this
EXTRA_T_PARAMS = {
    "optimizer": ["ADAM"],
    "num_epochs": [200],
    "learning_rate": [0.01],
    "lr_scheduler": [lr_scheduler],
}

In [None]:
m_temp_1 = etorch.create_subdictionary_iterator(MODEL_PARAMS, product=True)
m_temp_2 = etorch.create_subdictionary_iterator(EXTRA_M_PARAMS, product=False)
model_params_iter = etorch.add_dictionary_iterators(m_temp_1, m_temp_2, product=True)

t_temp_1 = etorch.create_subdictionary_iterator(TRAINING_PARAMS, product=True)
t_temp_2 = etorch.create_subdictionary_iterator(EXTRA_T_PARAMS, product=False)
training_params_iter = etorch.add_dictionary_iterators(t_temp_1, t_temp_2, product=True)
cv_results = etorch.k_fold_cv_grid(
    model_params=model_params_iter,
    fit=etorch.fit_module,
    training_params=training_params_iter,
    data=data,
    verbose=True,
    copy_data=True,
)

In [None]:
etorch.plotting.plot_result(
    path_figures=PATH_FIGURES,
    **cv_results,
)

In [None]:
s = run_skeletons[0]
bonelist = []
for b in s.bones.items():
    for dof in b[1].dof:
        bonelist.append(b[0] + " " + dof)

print("Chosen angles:")
[bonelist[i] for i in choosen]

In [None]:
i, j = 3, 1
models = cv_results["models"][0]

sample0 = models[0].sample([1000]).detach()
sample1 = models[1].sample([1000]).detach()
df0 = pd.DataFrame({"x": sample0[:, j], "y": sample0[:, i], "Flow": [0] * len(sample0)})
df1 = pd.DataFrame({"x": sample1[:, j], "y": sample1[:, i], "Flow": [1] * len(sample1)})
df = pd.concat((df0, df1), axis=0)
df.index = range(len(df))
sns.displot(df, x="x", y="y", hue="Flow", kind="kde")
run_point = data_run[:][0]
walk_points = data_walk[:][0]
plt.scatter(walk_points[:, j], walk_points[:, i], color="green", label="Walk samples")
plt.scatter(
    run_point[:, j], run_point[:, i], marker="x", color="grey", label="Run samples"
)
plt.xlabel(bonelist[choosen[j]])
plt.ylabel(bonelist[choosen[i]])
plt.legend()

plt.savefig(PATH_FIGURES + "/axis_distribution.pdf")
plt.show()

In [None]:
motion_data = data_walk[:][0]
print("Walk data:")
print(
    "Class 1:",
    torch.sum(models[0].log_prob(motion_data) < models[1].log_prob(motion_data)).item(),
    "Class 2:",
    torch.sum(models[0].log_prob(motion_data) > models[1].log_prob(motion_data)).item(),
)

motion_data = data_run[:][0]
print("Run data:")
print(
    "Class 1 :",
    torch.sum(models[0].log_prob(motion_data) < models[1].log_prob(motion_data)).item(),
    "Class 2:",
    torch.sum(models[0].log_prob(motion_data) > models[1].log_prob(motion_data)).item(),
)

### Variable selection

Here we test which dimension that clusters the best
The result of the cell below is the indexes of the chosen dimensions for the model.
**Note**: a bit of manual work might be nesseseary

In [None]:
DIR = "../figures/cluster_frames"
SET_NAME = "cont_euler_3"
PATH_FIGURES_TRIAL = os.path.join(DIR, "trial", SET_NAME)
event_shape = (1,)
base_dist = dist.Independent(
    dist.Normal(loc=torch.zeros(event_shape), scale=torch.ones(event_shape)), 1
)

lr_scheduler = lambda optim: torch.optim.lr_scheduler.ReduceLROnPlateau(
    optim, mode="min", factor=0.5, patience=5, verbose=False
)
MODEL_PARAMS = {
    "model": sf.nf.get_flow,
    "get_transform": sf.transforms.NDETransform,
    "base_dist": base_dist,
    "get_net": etorch.models.FFNN,
    "activation": "tanh",
    "inverse_model": True,
    "num_flows": 2,
    "sensitivity": "autograd",
    "neurons": [16],
    "n_hidden_layers": [4],
}

TRAINING_PARAMS = {
    "batch_size": [5000],
    "compute_loss": [sf.nf.get_monte_carlo_conditional_dkl_loss()],
    "optimizer": ["ADAM"],
    "num_epochs": [50],
    "learning_rate": [0.01],
    "compute_log": sf.nf.get_cluster_log,
    "lr_scheduler": [lr_scheduler],
}

In [None]:
cv_results_j = []
# remove and False to start
for joint in tqdm(nonzero):
    # create iterators
    model_params_iter = etorch.create_subdictionary_iterator(MODEL_PARAMS, product=True)
    training_params_iter = etorch.create_subdictionary_iterator(
        TRAINING_PARAMS, product=True
    )
    data = torch.utils.data.TensorDataset(
        wr_angle_tensor_norm[::10, joint : joint + 1],
        priors[::10].clone().detach(),
        priors[::10],
    )
    cv_results_j = etorch.k_fold_cv_grid(
        model_params=model_params_iter,
        fit=etorch.fit_module,
        training_params=training_params_iter,
        data=data,
        verbose=False,
        copy_data=True,
    )
    etorch.plotting.plot_result(
        path_figures=PATH_FIGURES_TRIAL + f"_{joint}",
        **cv_results_j,
    )

In [None]:
for i in nonzero:
    dfs = (
        pd.read_csv(PATH_FIGURES_TRIAL + f"_{i}/history_plot_t0_c0_f0.csv")[49::50]
        for i in nonzero
    )
loss_df = pd.concat(dfs, axis=0)
loss_df.index = range(len(nonzero))

In [None]:
loss_df.sort_values("Conditional entropy")

In [None]:
# best cluster per dim,
choosen_ = [1, 33, 14, 17, 26]

In [None]:
import extratorch as etorch

nonzero

In [None]:
print("Chosen angles:")
[bonelist[i] for i in choosen_]