# Expriment 1: Tweak the contrast function

This experiments removes the second `exp` in `ConstrastManagerV2`. Here, we test on `Apartment_release_work_skeleton_seq131` dataset.

In [None]:
import os

# Set environment variables for training script
SCENE_NAME='Apartment_release_work_skeleton_seq131'
ADT_PROCESSED_ROOT='/home/ubuntu/cs-747-project/adt_processed'
OUT_PATH='/home/ubuntu/cs-747-project/output/adt'
FOLDER_NAME='unc_2d_unet_egolifter_contrast_v2'

os.environ.update({
    "SCENE_NAME":         SCENE_NAME,
    "ADT_PROCESSED_ROOT": ADT_PROCESSED_ROOT,
    "OUT_PATH":           OUT_PATH,
    "FOLDER_NAME":        FOLDER_NAME,
})

In [None]:
!pwd
!ls /home/ubuntu/cs-747-project/adt_processed

In [None]:
# Run the training script. Use constrast v2 
!uv run python ../train_lightning.py \
	scene.scene_name={SCENE_NAME} \
    scene.data_root={ADT_PROCESSED_ROOT} \
    model=unc_2d_unet \
    model.unet_acti=sigmoid \
    model.dim_extra=16 \
    lift.use_contr=True \
    lift.name=v2 \
    exp_name=egolifter_contrast_v2 \
    output_root={OUT_PATH} \
    wandb.project=egolifter_adt

# View results

In [None]:
# Select one of the output folder from below
os.environ.update({
    "FOLDER_NAME": "unc_2d_unet_egolifter_contrast_v2",
})

!echo "Running with folder name: {OUT_PATH}/{SCENE_NAME}/{FOLDER_NAME}"

# This will start a local server 
# open the browser and go to the link for visualization
!uv run python ../viewer.py \
    {OUT_PATH}/{SCENE_NAME}/{FOLDER_NAME} \
    --data_root {ADT_PROCESSED_ROOT}  \
    --reorient disable \
    --feat_pca

# Load the model and explore learned features

In [None]:
import sys, os
sys.path.append(os.path.join(os.path.dirname(os.getcwd())))

from utils.routines import load_from_model_path

model_path = f'{OUT_PATH}/{SCENE_NAME}/{FOLDER_NAME}'
source_path= f'{ADT_PROCESSED_ROOT}/{SCENE_NAME}'

model, scene, cfg = load_from_model_path(
    model_path,
    source_path
)

In [None]:
# Print data statistics summary using pandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Load the data
data = model.gaussians.get_features_extra.detach().cpu().numpy()
# Convert to DataFrame
df = pd.DataFrame(data, columns=[f'Feature_{i}' for i in range(data.shape[1])])
# Print the first few rows
# print(df.head())
# Print the summary statistics
print(df.describe())
# Plot the distribution of each feature in a grid
num_features = len(df.columns)
num_cols = 4  # Number of columns in the grid
num_rows = (num_features + num_cols - 1) // num_cols  # Calculate required rows

fig, axes = plt.subplots(num_rows, num_cols, figsize=(20, 4 * num_rows))
axes = axes.flatten()

for i, column in enumerate(df.columns):
    sns.histplot(df[column], bins=30, kde=True, ax=axes[i])
    axes[i].set_title(f'Distribution of {column}')
    axes[i].set_xlabel(column)
    axes[i].set_ylabel('Frequency')

# Hide any unused subplots
for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.show()

In [None]:
# Perform PCA
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

# Load the data
data = model.gaussians.get_features_extra.detach().cpu().numpy()
# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
# Perform PCA
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)
# Create a DataFrame for the PCA results
df_pca = pd.DataFrame(data_pca, columns=['PC1', 'PC2'])
# Plot the PCA results
plt.figure(figsize=(10, 6))
sns.scatterplot(x='PC1', y='PC2', data=df_pca)
plt.title('PCA of Features')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid()
plt.show()

# T-SNE

## T-SNE 2D

In [None]:
# We need the data to be standardized
data_standardized = StandardScaler().fit_transform(data)

In [None]:
from sklearn.neighbors import NearestNeighbors

# Calculate the neighborhoods in the embedding space
data_standardized = StandardScaler().fit_transform(data)
knn = NearestNeighbors(n_neighbors=15, metric="cosine").fit(data_standardized)
knn_graph = knn.kneighbors_graph(mode="distance")     # sparse matrix

In [None]:
# Calculate tsne
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=30, n_iter=300, random_state=42)
X_tsne = tsne.fit_transform(data_standardized)
# Plot the t-SNE results
plt.figure(figsize=(10, 6))
plt.scatter(X_tsne[:, 0], X_tsne[:, 1], s=5)
plt.title('t-SNE of Features')
plt.xlabel('t-SNE Component 1')
plt.ylabel('t-SNE Component 2')
plt.grid()
plt.show()

In [None]:
# 3D t-SNE
# from mpl_toolkits.mplot3d import Ax
# import Axes3D
from mpl_toolkits.mplot3d import Axes3D

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from sklearn.manifold import TSNE
from sklearn.preprocessing import StandardScaler

# Load the data
data = model.gaussians.get_features_extra.detach().cpu().numpy()
# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Perform t-SNE
tsne = TSNE(n_components=3, perplexity=30, n_iter=300, random_state=42)
X_tsne = tsne.fit_transform(data_scaled)

# Plot the t-SNE results in 3D
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_tsne[:, 0], X_tsne[:, 1], X_tsne[:, 2], s=5)
ax.set_title('3D t-SNE of Features')
ax.set_xlabel('t-SNE Component 1')
ax.set_ylabel('t-SNE Component 2')
ax.set_zlabel('t-SNE Component 3')
plt.show()

# Plot the t-SNE results in 3D with color coding
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X_tsne[:, 0], X_tsne[:, 1], X_tsne[:, 2], c=data[:, 0], cmap='viridis', s=5)
ax.set_title('3D t-SNE of Features with Color Coding')
ax.set_xlabel('t-SNE Component 1')
ax.set_ylabel('t-SNE Component 2')
ax.set_zlabel('t-SNE Component 3')
plt.colorbar(ax.collections[0], ax=ax, label='Feature Value')
plt.show()

# Create an interactive 3D plot using Plotly



## T-SNE 3D

In [None]:
import plotly.express as px

# Create an interactive 3D scatter plot with Plotly
fig = px.scatter_3d(
    x=X_tsne[:, 0],
    y=X_tsne[:, 1],
    z=X_tsne[:, 2],
    color=data[:, 0],  # Adjust the feature for color-coding as needed
    labels={'x': 'TSNE Component 1', 
            'y': 'TSNE Component 2', 
            'z': 'TSNE Component 3'},
    title='Interactive 3D t-SNE Plot of Features'
)
fig.update_traces(marker=dict(size=1))
fig.update_layout(height=800)
fig.show()

# UMAP

Another way to vizualize the groupings.

In [None]:
import umap

umap2d = umap.UMAP(
    n_neighbors=15,        # matches knn above
    min_dist=0.1,          # 0 → tighter clusters, >0.4 → looser
    metric="cosine"
).fit_transform(data_standardized)

In [None]:
import hdbscan
# Cluster the data using HDBSCAN. Ideally, we'd use ground truth labels for our UMAP. But we don't have time to calculate those. 
# With more time, we use the ARIA dataset to map our 3dgs points to ground truth labels. 
labels = hdbscan.HDBSCAN(min_cluster_size=20, metric="euclidean").fit_predict(umap2d)

In [None]:
import plotly.express as px
fig = px.scatter(x=umap2d[:,0], y=umap2d[:,1],
                 color=labels.astype(str),
                 hover_data=dict(index=list(range(len(data)))),
                 )
fig.show()