In [None]:
import pandas as pd
import numpy as np
import umap.umap_ as umap
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import plotly.express as px
from AQSM_SW1PerS.utils.paths import get_data_path
from Classification_experiments.classification_experiments import compress_features


In [None]:

def explained_variance_plots(X_features):
    '''
    Shows the minimum number of dimensions in the feature space needed to explain 95% of the variance
    '''
    pca = PCA(n_components=min(X_features.shape[1], 50)) 
    X_pca = pca.fit_transform(X_features)
    
    plt.figure(figsize=(8,5))
    plt.plot(np.cumsum(pca.explained_variance_ratio_), marker='o')
    plt.axhline(y=0.95, color='r', linestyle='--', label="95% Variance Threshold")
    plt.xlabel("Number of Components")
    plt.ylabel("Cumulative Explained Variance")
    plt.title("PCA Explained Variance")
    plt.legend()
    plt.grid(True)
    plt.show()


def plot_interactive(df_umap_tda, save_html=False, color_by_anno=True, output_name = 'anno_tda'):

    if color_by_anno:
        color = df_umap_tda['Annotations'].astype(str)
    else:
        color = df_umap_tda['PID'].astype(str)

    fig = px.scatter_3d(df_umap_tda, x='UMAP1', y='UMAP2', z='UMAP3', 
                     color=color,  
                     title="Interactive TDA Features UMAP",
                     opacity=0.2)
    fig.update_traces(marker=dict(size=2)) 
    fig.show()
    if save_html:
        fig.write_html(f'{output_name}.html')


In [None]:

data_file = get_data_path("Periodicity_Scores", "exp234_PS/pose_exp234.csv")

df = pd.read_csv(data_file)

tda_cols = [f"TDA_{i+1}" for i in range(120)]

tda_features = df[tda_cols].to_numpy()

full_person_ids = df["Person_ID"].to_numpy()
full_annotations = df['Annotation_1'].to_numpy()

num_feature_groups_10 = int(len(tda_features[0])/10)

group_sizes = [10] * num_feature_groups_10

X_tda = compress_features(tda_features, group_sizes)

explained_variance_plots(X_tda)


In [None]:

umap_reducer = umap.UMAP(n_components=3, random_state=42, n_jobs = -1)
X_umap_tda = umap_reducer.fit_transform(X_tda)


In [None]:

df_umap_tda = pd.DataFrame(X_umap_tda, columns=['UMAP1', 'UMAP2', 'UMAP3'])

df_umap_tda['PID'] = full_person_ids 
df_umap_tda['Annotations'] = full_annotations  

plot_interactive(df_umap_tda, color_by_anno = True, save_html=False, output_name = 'anno_tda')


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.api.types import CategoricalDtype
import plotly.express as px

# --- Define Color Palettes ---
def rgb_string_to_tuple(rgb_string):
    parts = rgb_string.strip('rgb()').split(',')
    return tuple(int(p) / 255 for p in parts)

plotly_safe_rgb = px.colors.qualitative.Safe
plotly_safe_mpl = [rgb_string_to_tuple(c) for c in plotly_safe_rgb]

label_map = {
    0: 'None',
    1: 'Rock',
    2: 'Flap',
    3: 'Flap Rock'
}
label_order = ['None', 'Rock', 'Flap', 'Flap Rock']
color_sequence = ['#000000', '#E69F00', '#56B4E9', '#009E73']
cat_type = CategoricalDtype(categories=label_order, ordered=True)

# --- Subsample Once ---
df_sampled = df_umap_tda.groupby('PID').apply(lambda x: x.sample(frac=0.1)).reset_index(drop=True)

# Add necessary columns
df_sampled['PID'] = df_sampled['PID'].astype(str)
df_sampled['Label'] = df_sampled['Annotations'].map(label_map).astype(cat_type)

# Create color maps
unique_pids = sorted(df_sampled['PID'].unique())
color_map_pid = dict(zip(unique_pids, plotly_safe_mpl[:len(unique_pids)]))
color_map_label = dict(zip(label_order, color_sequence))

# --- Create Side-by-Side Figure ---
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

# Panel (a): Colored by Child
sns.scatterplot(
    data=df_sampled,
    x='UMAP2',
    y='UMAP3',
    hue='PID',
    palette=color_map_pid,
    s=30,
    alpha=0.6,
    linewidth=0,
    ax=ax1
)
ax1.set_title(r"2D UMAP Pose Feature Space $PS_{10}$ – Colored by Child", fontsize=14, family="Times New Roman")
ax1.set_xlabel("UMAP2", fontsize=12, family="Times New Roman")
ax1.set_ylabel("UMAP3", fontsize=12, family="Times New Roman")
ax1.set_xticks([]); ax1.set_yticks([])
ax1.text(-0.1, 1.05, "(a)", transform=ax1.transAxes, fontsize=14, fontweight="bold", family="Times New Roman")
ax1.legend(title='Child', title_fontsize=10, fontsize=8, loc='center left', bbox_to_anchor=(1.02, 0.5), prop={'family': 'Times New Roman'})

# Panel (b): Colored by Stereotypy
sns.scatterplot(
    data=df_sampled,
    x='UMAP2',
    y='UMAP3',
    hue='Label',
    palette=color_map_label,
    s=30,
    alpha=0.6,
    linewidth=0,
    ax=ax2
)
ax2.set_title("2D UMAP Pose Feature Space $PS_{10}$  – Colored by Stereotypy", fontsize=14, family="Times New Roman")
ax2.set_xlabel("UMAP2", fontsize=12, family="Times New Roman")
ax2.set_ylabel("UMAP3", fontsize=12, family="Times New Roman")
ax2.set_xticks([]); ax2.set_yticks([])
ax2.text(-0.1, 1.05, "(b)", transform=ax2.transAxes, fontsize=14, fontweight="bold", family="Times New Roman")
ax2.legend(title='Class', title_fontsize=10, fontsize=9, prop={'family': 'Times New Roman'})

plt.tight_layout()
plt.savefig("Supplementary_Figure_14.pdf", bbox_inches='tight')
plt.show()
