# ManifoldAgnostic: Toy Data Visualization

This notebook visualizes the toy point cloud datasets for testing the neural field diffusion model.

We generate point clouds on manifolds of different dimensions:
- **1D Manifolds**: Curves (circle, helix, trefoil knot, figure-8)
- **2D Manifolds**: Surfaces (sphere, torus, plane, cylinder, Möbius strip)
- **3D Manifolds**: Volumes (cube, ball, ellipsoid, shell)

In [None]:
import sys
sys.path.append('..')

import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
from data.toy_data import (
    get_all_generators, 
    get_shapes_by_dimension,
    compute_statistics,
    ManifoldDim
)

# Set style
plt.style.use('seaborn-v0_8-whitegrid')
%matplotlib inline

## 1. Generate All Shapes

In [None]:
# Generate all shapes with 2048 points each
N_POINTS = 2048
generators = get_all_generators()

shapes = {}
for name, gen in generators.items():
    shapes[name] = gen(n_points=N_POINTS).normalize()
    
print(f"Generated {len(shapes)} shapes with {N_POINTS} points each:")
for name, pc in shapes.items():
    print(f"  - {name}: {pc.manifold_dim.name}")

## 2. Visualize Shapes by Manifold Dimension

### 2.1 1D Manifolds (Curves)

In [None]:
def plot_point_cloud(ax, pc, title, color='b', size=1, alpha=0.6):
    """Plot a single point cloud on a 3D axis."""
    ax.scatter(
        pc.points[:, 0], 
        pc.points[:, 1], 
        pc.points[:, 2],
        c=color, s=size, alpha=alpha
    )
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.set_title(title)
    
    # Equal aspect ratio
    max_range = 1.2
    ax.set_xlim([-max_range, max_range])
    ax.set_ylim([-max_range, max_range])
    ax.set_zlim([-max_range, max_range])

In [None]:
# 1D Manifolds
curves_1d = get_shapes_by_dimension(1)
print(f"1D Manifolds (Curves): {curves_1d}")

fig = plt.figure(figsize=(16, 4))
for i, name in enumerate(curves_1d):
    ax = fig.add_subplot(1, len(curves_1d), i+1, projection='3d')
    plot_point_cloud(ax, shapes[name], name.replace('_', ' ').title(), 
                     color='darkblue', size=2, alpha=0.8)
    ax.view_init(elev=20, azim=45)

plt.suptitle('1D Manifolds (Curves) - Points on 1-dimensional structures in 3D', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

### 2.2 2D Manifolds (Surfaces)

In [None]:
# 2D Manifolds
surfaces_2d = get_shapes_by_dimension(2)
print(f"2D Manifolds (Surfaces): {surfaces_2d}")

fig = plt.figure(figsize=(16, 8))
n_cols = 3
n_rows = (len(surfaces_2d) + n_cols - 1) // n_cols

for i, name in enumerate(surfaces_2d):
    ax = fig.add_subplot(n_rows, n_cols, i+1, projection='3d')
    plot_point_cloud(ax, shapes[name], name.replace('_', ' ').title(),
                     color='darkgreen', size=1, alpha=0.5)
    ax.view_init(elev=25, azim=45)

plt.suptitle('2D Manifolds (Surfaces) - Points on 2-dimensional structures in 3D', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

### 2.3 3D Manifolds (Volumes)

In [None]:
# 3D Manifolds
volumes_3d = get_shapes_by_dimension(3)
print(f"3D Manifolds (Volumes): {volumes_3d}")

fig = plt.figure(figsize=(16, 4))
for i, name in enumerate(volumes_3d):
    ax = fig.add_subplot(1, len(volumes_3d), i+1, projection='3d')
    plot_point_cloud(ax, shapes[name], name.replace('_', ' ').title(),
                     color='darkred', size=1, alpha=0.3)
    ax.view_init(elev=25, azim=45)

plt.suptitle('3D Manifolds (Volumes) - Points filling 3-dimensional regions', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

## 3. Statistics Table

In [None]:
# Compute statistics for all shapes
all_stats = []
for name, pc in shapes.items():
    stats = compute_statistics(pc)
    all_stats.append(stats)

# Create DataFrame
df = pd.DataFrame(all_stats)

# Select key columns for display
display_cols = ['name', 'manifold_dim', 'num_points', 'scale', 
                'mean_dist_from_center', 'std_dist_from_center', 
                'mean_nn_distance']
df_display = df[display_cols].round(4)
df_display.columns = ['Shape', 'Dim', 'Points', 'Scale', 
                      'Mean Dist', 'Std Dist', 'Mean NN Dist']

print("Point Cloud Statistics:")
df_display

## 4. Resolution Independence Demonstration

A key property of our neural field approach is **resolution independence**.
The same manifold can be represented with different numbers of points.

In [None]:
# Demonstrate resolution independence with sphere
from data.toy_data import generate_sphere

resolutions = [64, 256, 1024, 4096]

fig = plt.figure(figsize=(16, 4))
for i, n in enumerate(resolutions):
    ax = fig.add_subplot(1, 4, i+1, projection='3d')
    pc = generate_sphere(n_points=n).normalize()
    plot_point_cloud(ax, pc, f'Sphere (N={n})',
                     color='purple', size=max(1, 10-i*2), alpha=0.6)
    ax.view_init(elev=20, azim=45)

plt.suptitle('Resolution Independence: Same Manifold, Different Point Counts', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

print("Neural Field Advantage: The continuous field v_θ(x,t) can be queried at ANY resolution!")

## 5. Forward Diffusion Visualization

The forward process adds noise, moving points OFF the manifold.
The neural field learns to reverse this: moving points ONTO the manifold.

In [None]:
# Visualize forward diffusion (adding noise)
from data.toy_data import generate_torus

# Generate clean torus
torus = generate_torus(n_points=2000).normalize()

# Different noise levels (simulating diffusion time)
noise_levels = [0.0, 0.05, 0.15, 0.3, 0.5]
time_labels = ['t=0 (clean)', 't=0.2', 't=0.5', 't=0.8', 't=1.0 (noise)']

fig = plt.figure(figsize=(20, 4))
for i, (noise, label) in enumerate(zip(noise_levels, time_labels)):
    ax = fig.add_subplot(1, 5, i+1, projection='3d')
    
    if noise > 0:
        noisy = torus.add_noise(noise)
    else:
        noisy = torus
    
    # Color by distance from clean surface
    ax.scatter(
        noisy.points[:, 0], 
        noisy.points[:, 1], 
        noisy.points[:, 2],
        c=plt.cm.viridis(i / len(noise_levels)),
        s=2, alpha=0.5
    )
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_zlabel('Z')
    ax.set_title(label)
    ax.set_xlim([-1.5, 1.5])
    ax.set_ylim([-1.5, 1.5])
    ax.set_zlim([-1.5, 1.5])
    ax.view_init(elev=25, azim=45)

plt.suptitle('Forward Diffusion Process: Points Diffuse from Manifold to Noise', 
             fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

print("The reverse process (learned neural field) brings noisy points BACK to the manifold.")

## 6. Distribution Analysis by Manifold Dimension

Let's analyze how point distributions differ across manifold dimensions.

In [None]:
# Histogram of distances from center for each manifold type
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

dim_groups = {
    '1D Curves': get_shapes_by_dimension(1),
    '2D Surfaces': get_shapes_by_dimension(2),
    '3D Volumes': get_shapes_by_dimension(3)
}

colors = {'1D Curves': 'blue', '2D Surfaces': 'green', '3D Volumes': 'red'}

for ax, (group_name, shape_names) in zip(axes, dim_groups.items()):
    for name in shape_names:
        pc = shapes[name]
        dists = np.linalg.norm(pc.points, axis=1)
        ax.hist(dists, bins=50, alpha=0.5, label=name, density=True)
    
    ax.set_xlabel('Distance from Center')
    ax.set_ylabel('Density')
    ax.set_title(group_name)
    ax.legend(fontsize=8)

plt.suptitle('Point Distance Distribution by Manifold Dimension', fontsize=14, y=1.02)
plt.tight_layout()
plt.show()

## 7. Nearest Neighbor Analysis

The nearest neighbor distance distribution tells us about point density and uniformity.

In [None]:
def compute_nn_distances(points, sample_size=500):
    """Compute nearest neighbor distances for a sample of points."""
    n = len(points)
    if n <= sample_size:
        sample = points
    else:
        idx = np.random.choice(n, sample_size, replace=False)
        sample = points[idx]
    
    nn_dists = []
    for i, p in enumerate(sample):
        dists = np.linalg.norm(sample - p, axis=1)
        dists[i] = np.inf
        nn_dists.append(np.min(dists))
    
    return np.array(nn_dists)

# Compare NN distances across a few shapes
selected_shapes = ['circle', 'sphere', 'torus', 'ball_volume']

fig, ax = plt.subplots(figsize=(10, 6))

for name in selected_shapes:
    pc = shapes[name]
    nn_dists = compute_nn_distances(pc.points)
    ax.hist(nn_dists, bins=30, alpha=0.5, label=f"{name} (dim={pc.manifold_dim.value})")

ax.set_xlabel('Nearest Neighbor Distance')
ax.set_ylabel('Count')
ax.set_title('Nearest Neighbor Distance Distribution')
ax.legend()

plt.tight_layout()
plt.show()

print("Observation: Lower-dimensional manifolds have smaller NN distances (points are 'closer' along the manifold).")

## 8. Summary: What Makes This Data Suitable for Neural Field Testing

### Key Properties:

1. **Diverse Manifold Dimensions**: 1D, 2D, and 3D manifolds test if the model learns the correct dimensionality

2. **Known Ground Truth**: We know the exact manifold equation, enabling precise evaluation

3. **Resolution Scalability**: Same manifold can be sampled at any resolution

4. **Simple Geometry**: Easy to verify if generated points lie on the target manifold

5. **Variable Topology**: Includes closed (sphere), open (plane), and non-orientable (Möbius) surfaces

### Testing Protocol:

1. Train neural field on noised point clouds
2. Generate by integrating the learned field from noise
3. Measure distance of generated points to ground-truth manifold
4. Test resolution independence by generating at different point counts

In [None]:
# Summary statistics table
summary_data = {
    'Category': ['1D Curves', '2D Surfaces', '3D Volumes', 'Total'],
    'Count': [len(curves_1d), len(surfaces_2d), len(volumes_3d), len(shapes)],
    'Shapes': [', '.join(curves_1d), ', '.join(surfaces_2d), ', '.join(volumes_3d), '-']
}

summary_df = pd.DataFrame(summary_data)
print("Dataset Summary:")
summary_df

## 9. Save Sample Data for Training

In [None]:
import os

# Create data directory
os.makedirs('../data/samples', exist_ok=True)

# Save each shape as numpy arrays
for name, pc in shapes.items():
    np.save(f'../data/samples/{name}_points.npy', pc.points)
    if pc.normals is not None:
        np.save(f'../data/samples/{name}_normals.npy', pc.normals)

print(f"Saved {len(shapes)} point cloud files to ../data/samples/")
print("\nFiles saved:")
for f in sorted(os.listdir('../data/samples')):
    print(f"  {f}")