In [None]:
import pandas as pd
import torch
import numpy as np
import sys
import matplotlib.pyplot as plt
import warnings

sys.path.append("../../")
from src.vae_architectures.lstm import LSTMVariationalAutoEncoder
from src.vae_architectures.signal_cnn import SignalCNNVariationalAutoEncoder
from src.vae_architectures.graph_cnn import GraphVariationalAutoEncoder
from src.dataset import ExerciseDataset
from src.utils.constants import (
    EXERCISES,
    NUM_JOINTS,
    SEQUENCE_LENGTH,
    LATENT_SIZE,
    NUM_LAYERS,
    HIDDEN_SIZE,
    BATCH_SIZE,
)

In [None]:
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
def get_models(
    architecture: torch.nn.Module, architecture_name: str, models_path: str
) -> list[torch.nn.Module]:
    models = {}
    for exercise in EXERCISES:
        architecture.load_state_dict(
            torch.load(
                f"{models_path}/{exercise}/dct_{architecture_name}.pt",
                map_location=torch.device("cpu"),
            )
        )
        models[f"{exercise}_{architecture_name}"] = architecture

    return models

In [None]:
lstm_architecture = LSTMVariationalAutoEncoder(
    SEQUENCE_LENGTH, NUM_JOINTS * 3, HIDDEN_SIZE, LATENT_SIZE, NUM_LAYERS
)
lstm_models = get_models(
    lstm_architecture, architecture_name="lstm", models_path="../../models"
)

cnn_architecture = SignalCNNVariationalAutoEncoder(
    SEQUENCE_LENGTH, NUM_JOINTS * 3, HIDDEN_SIZE, LATENT_SIZE
)
cnn_models = get_models(
    cnn_architecture, architecture_name="cnn", models_path="../../models"
)

graph_architecture = GraphVariationalAutoEncoder(
    SEQUENCE_LENGTH, NUM_JOINTS * 3, HIDDEN_SIZE, LATENT_SIZE
)
graph_models = get_models(
    graph_architecture, architecture_name="graph", models_path="../../models"
)

In [None]:
train_datasets = {}
test_datasets = {}

for exercise in EXERCISES:
    train_df = pd.read_csv(f"../../data/train/{exercise}/dct.csv")
    test_df = pd.read_csv(f"../../data/test/{exercise}/dct.csv")

    train_datasets[exercise] = ExerciseDataset(train_df, representation="dct")
    test_datasets[exercise] = ExerciseDataset(test_df, representation="dct")

In [None]:
X_train = {
    exercise: torch.stack([rep for rep in train_datasets[exercise].data])
    for exercise in EXERCISES
}
y_train = {
    exercise: [
        1 if label == 0 else 0
        for label in torch.stack(
            [rep for rep in train_datasets[exercise].labels_encoded]
        )
    ]
    for exercise in EXERCISES
}

X_test = {
    exercise: torch.stack([rep for rep in test_datasets[exercise].data])
    for exercise in EXERCISES
}
y_test = {
    exercise: [
        1 if label == 0 else 0
        for label in torch.stack(
            [rep for rep in test_datasets[exercise].labels_encoded]
        )
    ]
    for exercise in EXERCISES
}

In [None]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2)
fig, axes = plt.subplots(ncols=3, nrows=3, figsize=(16, 12))

for (name, model), axis in zip(
    [*lstm_models.items(), *cnn_models.items(), *graph_models.items()], axes.flatten()
):
    exercise_name, model_name = name.split("_")
    X_train_embedded_squat = model.encoder(X_train[exercise_name])[0].detach().numpy()
    X_test_embedded_squat = model.encoder(X_test[exercise_name])[0].detach().numpy()

    latent_space_squat = tsne.fit_transform(
        np.concatenate([X_train_embedded_squat, X_test_embedded_squat])
    )
    all_y = np.concatenate([y_train[exercise_name], y_test[exercise_name]])
    axis.scatter(
        latent_space_squat[all_y == 1][:, 0],
        latent_space_squat[all_y == 1][:, 1],
        c="green",
    )
    axis.scatter(
        latent_space_squat[all_y == 0][:, 0],
        latent_space_squat[all_y == 0][:, 1],
        c="red",
    )

    axis.legend(["Correct", "Incorrect"])
    axis.set_title(f"{exercise_name} {model_name}")

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score, roc_auc_score

from sklearn.utils.class_weight import compute_class_weight

for name, model in [*lstm_models.items(), *cnn_models.items(), *graph_models.items()]:
    exercise_name, model_name = name.split("_")
    f1 = []
    auc = []
    for _ in range(50):
        X_train_embedded = model.encoder(X_train[exercise_name])[0].detach().numpy()
        X_test_embedded = model.encoder(X_test[exercise_name])[0].detach().numpy()
        class_weights = compute_class_weight(
            "balanced", classes=np.unique(y_train[exercise]), y=y_train[exercise]
        )

        clf = DecisionTreeClassifier(class_weight=dict(enumerate(class_weights))).fit(
            X_train_embedded, y_train[exercise_name]
        )
        y_pred = clf.predict(X_test_embedded)
        y_pred_proba = clf.predict_proba(X_test_embedded)
        f1.append(f1_score(y_test[exercise_name], y_pred))
        auc.append(roc_auc_score(y_test[exercise_name], y_pred_proba[:, 1]))

    print(
        f"{exercise_name}: {model_name} mean f1-score: {np.mean(f1)}, std: {np.std(f1)}, mean auc: {np.mean(auc)}, std: {np.std(auc)}"
    )

### Generation 

In [None]:
model = lstm_models["squat_lstm"]
X_train_embedded_squat = model.encoder(X_train["squat"])[0].detach().numpy()
X_test_embedded_squat = model.encoder(X_test["squat"])[0].detach().numpy()

latent_space_squat = np.concatenate([X_train_embedded_squat, X_test_embedded_squat])

In [None]:
fig, axis = plt.subplots(figsize=(12, 6))

latent_space_squat_reduced = tsne.fit_transform(latent_space_squat)
y_squat_all = np.concatenate([y_train["squat"], y_test["squat"]])
axis.scatter(
    latent_space_squat_reduced[y_squat_all == 1][:, 0],
    latent_space_squat_reduced[y_squat_all == 1][:, 1],
    c="green",
)
axis.scatter(
    latent_space_squat_reduced[y_squat_all == 0][:, 0],
    latent_space_squat_reduced[y_squat_all == 0][:, 1],
    c="red",
)

In [None]:
from utils.data import get_random_sample
from torch.utils.data import DataLoader

test_squat_dl = DataLoader(test_datasets["squat"], batch_size=8)

sample, sample_label = get_random_sample(test_squat_dl, desired_label="feet_too_wide")
sample_encoded = model.encoder(sample.unsqueeze(dim=0))[0].detach().numpy()

In [None]:
fig, axis = plt.subplots(figsize=(12, 6))

# latent_space_squat_reduced = tsne.fit_transform(latent_space_squat)
# y_squat_all = np.concatenate([y_train['squat'], y_test['squat']])
axis.scatter(
    latent_space_squat_reduced[y_squat_all == 1][:, 0],
    latent_space_squat_reduced[y_squat_all == 1][:, 1],
    c="green",
    label="Correct",
)
axis.scatter(
    latent_space_squat_reduced[y_squat_all == 0][:, 0],
    latent_space_squat_reduced[y_squat_all == 0][:, 1],
    c="red",
    label="Incorrect",
)
axis.scatter(
    latent_space_squat_reduced[sample_label, 0],
    latent_space_squat_reduced[sample_label, 1],
    c="orange",
    label="Incorrect sample",
)
axis.legend()

In [None]:
from src.explainer import Explainer
import pickle

with open("../../models/squat/lstm_clf.pkl", "rb") as f:
    clf = pickle.load(f)

explainer = Explainer(
    lstm_models["squat_lstm"],
    clf,
    DataLoader(train_datasets["squat"], batch_size=8),
    exercise="squat",
)
fixed_sample = explainer.generate_cf(sample)

In [None]:
fig, axis = plt.subplots(figsize=(10, 8))

latent_space_squat_reduced = tsne.fit_transform(
    np.concatenate([latent_space_squat, fixed_sample])
)
y_squat_all = np.concatenate([y_train["squat"], y_test["squat"], [2]])

axis.scatter(
    latent_space_squat_reduced[y_squat_all == 1][:, 0],
    latent_space_squat_reduced[y_squat_all == 1][:, 1],
    c="green",
    label="Correct",
)
axis.scatter(
    latent_space_squat_reduced[y_squat_all == 0][:, 0],
    latent_space_squat_reduced[y_squat_all == 0][:, 1],
    c="red",
    label="Incorrect",
)
axis.scatter(
    latent_space_squat_reduced[sample_label, 0],
    latent_space_squat_reduced[sample_label, 1],
    c="orange",
    label="Incorrect sample",
    s=150,
)
axis.scatter(
    latent_space_squat_reduced[-1, 0],
    latent_space_squat_reduced[-1, 1],
    c="blue",
    label="Fixed sample",
    s=150,
)
axis.legend()
axis.set_title("LSTM-based VAE's latent space for squat")

In [None]:
closest_correct_sample = explainer.get_closest_correct(sample)