# SV-SCN Production Training

**Train a production-quality 3D shape completion model**

- 500 training samples
- 150 epochs (~2-3 hours)
- Automatic checkpoint handling
- Quality validation


## Step 1: Check GPU

In [None]:
!nvidia-smi

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("‚ö†Ô∏è Enable GPU: Runtime ‚Üí Change runtime type ‚Üí GPU")

## Step 2: Clone Project

**IMPORTANT:** Replace `YOUR_GITHUB_USERNAME` with your username!

In [None]:
# Clone repository
!git clone https://github.com/ashish-frozo/frozo-3d-model.git
%cd frozo-3d-model

# Verify structure
!ls -la svscn/ scripts/ 2>&1 || echo "‚ùå Project structure not found!"

## Step 3: Install Dependencies

In [None]:
!pip install -q open3d>=0.17.0 trimesh>=4.0.0 scipy>=1.10.0
!pip install -q objaverse>=0.1.7 tensorboard>=2.14.0

print("‚úÖ Dependencies installed")

## Step 4: Test Imports

In [None]:
from svscn.models import SVSCN
from svscn.config import default_config

print(f"‚úÖ Imports successful")
print(f"Model version: {default_config.VERSION}")

## Step 5: Generate 500 Training Samples

This will create ~1500 training pairs (500 samples √ó 3 views)

In [None]:
# Generate placeholder data
!python -m svscn.data.shapenet \
    --placeholder \
    --output_dir data/shapenet_500 \
    --samples_per_category 167

print("\n‚úÖ Step 1/3: Generated meshes")

In [None]:
# Preprocess to point clouds
!python -m svscn.data.preprocess \
    --input_dir data/shapenet_500 \
    --output_dir data/processed_500 \
    --num_points 8192

print("\n‚úÖ Step 2/3: Converted to point clouds")

In [None]:
# Generate training pairs
!python -m svscn.data.augment \
    --input_dir data/processed_500 \
    --output_dir data/training_500 \
    --views 3

print("\n‚úÖ Step 3/3: Created training pairs")

# Verify
import subprocess
result = subprocess.run(['find', 'data/training_500', '-name', '*.npy'], 
                       capture_output=True, text=True)
num_files = len(result.stdout.strip().split('\n'))
print(f"Total training files: {num_files}")

## Step 6: Create Data Splits

In [None]:
import numpy as np
from pathlib import Path

training_dir = Path('data/training_500')
full_dir = training_dir / 'full'

# Get unique samples
samples = set()
for f in full_dir.glob('*_full.npy'):
    name = f.stem.replace('_full', '')
    base = '_'.join(name.split('_')[:-1])
    samples.add(base)

samples = sorted(list(samples))
print(f"Total unique samples: {len(samples)}")

# 80/10/10 split
np.random.seed(42)
np.random.shuffle(samples)

n = len(samples)
train = samples[:int(0.8*n)]
val = samples[int(0.8*n):int(0.9*n)]
test = samples[int(0.9*n):]

# Save splits
splits_dir = training_dir / 'splits'
splits_dir.mkdir(exist_ok=True)

(splits_dir / 'train.txt').write_text('\n'.join(train))
(splits_dir / 'val.txt').write_text('\n'.join(val))
(splits_dir / 'test.txt').write_text('\n'.join(test))

print(f"‚úÖ Splits: {len(train)} train, {len(val)} val, {len(test)} test")

## Step 7: Train Production Model (150 epochs)

‚è±Ô∏è **This will take 2-3 hours** - keep tab open!

To run a quick test first (10 epochs, ~10 min), change `--epochs 150` to `--epochs 10`

In [None]:
# Production training - 150 epochs
!python scripts/train.py \
    --data_dir data/training_500 \
    --epochs 150 \
    --batch_size 32 \
    --checkpoint_dir checkpoints_prod \
    --log_dir logs_prod \
    --device cuda

print("\n‚úÖ Training complete!")

## Step 8: Monitor Training (TensorBoard)

Run this in parallel while training to watch progress

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs_prod

## Step 9: Find Best Checkpoint

**Automatic checkpoint detection** - no hardcoding!

In [None]:
import glob

# Find all checkpoint files
checkpoint_files = glob.glob('checkpoints_prod/*/best.pt')

if not checkpoint_files:
    print("‚ùå No checkpoint found! Make sure training completed.")
    CHECKPOINT_PATH = None
else:
    # Use the most recent one
    CHECKPOINT_PATH = sorted(checkpoint_files)[-1]
    print(f"‚úÖ Found checkpoint: {CHECKPOINT_PATH}")
    
    # Get info
    !ls -lh {CHECKPOINT_PATH}

## Step 10: Test Inference

In [None]:
# Create test input
import numpy as np

partial = np.random.randn(2048, 3).astype(np.float32)
partial = (partial - partial.mean(axis=0)) / partial.std()
np.save('test_partial.npy', partial)

print(f"‚úÖ Created test input: {partial.shape}")

In [None]:
# Run inference with automatic checkpoint
if CHECKPOINT_PATH:
    !python scripts/infer.py \
        --checkpoint {CHECKPOINT_PATH} \
        --input test_partial.npy \
        --output completed.npy \
        --class_id 0 \
        --device cuda
    
    print("\n‚úÖ Inference complete!")
else:
    print("‚ùå Cannot run inference - no checkpoint found")

## Step 11: Visualize Results

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

partial = np.load('test_partial.npy')
completed = np.load('completed.npy')

fig = plt.figure(figsize=(15, 5))

# Input
ax1 = fig.add_subplot(131, projection='3d')
ax1.scatter(partial[:, 0], partial[:, 1], partial[:, 2], c='blue', s=1)
ax1.set_title('Input (Partial)', fontsize=14)
ax1.set_box_aspect([1,1,1])

# Output
ax2 = fig.add_subplot(132, projection='3d')
ax2.scatter(completed[:, 0], completed[:, 1], completed[:, 2], c='green', s=1)
ax2.set_title('Output (Completed)', fontsize=14)
ax2.set_box_aspect([1,1,1])

# Overlay
ax3 = fig.add_subplot(133, projection='3d')
ax3.scatter(partial[:, 0], partial[:, 1], partial[:, 2], c='blue', s=1, alpha=0.5, label='Input')
ax3.scatter(completed[:, 0], completed[:, 1], completed[:, 2], c='green', s=1, alpha=0.3, label='Output')
ax3.set_title('Comparison', fontsize=14)
ax3.legend()
ax3.set_box_aspect([1,1,1])

plt.tight_layout()
plt.show()

print(f"Input: {len(partial)} points")
print(f"Output: {len(completed)} points")

## Step 12: Export as GLB (AR-ready)

In [None]:
# Export to GLB with automatic checkpoint
if CHECKPOINT_PATH:
    !python scripts/infer.py \
        --checkpoint {CHECKPOINT_PATH} \
        --input test_partial.npy \
        --output completed_prod.glb \
        --export_mesh \
        --class_id 0 \
        --device cuda
    
    print("\n‚úÖ GLB export complete!")
else:
    print("‚ùå Cannot export - no checkpoint found")

## Step 13: Download Files

In [None]:
from google.colab import files

if CHECKPOINT_PATH:
    # Download checkpoint
    files.download(CHECKPOINT_PATH)
    print(f"Downloaded: {CHECKPOINT_PATH}")
    
    # Download GLB
    files.download('completed_prod.glb')
    print("Downloaded: completed_prod.glb")
    
    print("\n‚úÖ All files downloaded!")
else:
    print("‚ùå No files to download - training not complete")

## Step 14: Quality Metrics

Check if model meets production criteria

In [None]:
import json

# Find training summary
summary_files = glob.glob('checkpoints_prod/*/training_summary.json')

if summary_files:
    with open(summary_files[-1]) as f:
        summary = json.load(f)
    
    print("=" * 50)
    print("TRAINING SUMMARY")
    print("=" * 50)
    print(f"Best val loss: {summary['best_val_loss']:.4f}")
    print(f"Final train loss: {summary['train_losses'][-1]:.4f}")
    print(f"Epochs completed: {summary['epochs_completed']}")
    print("\n" + "=" * 50)
    print("QUALITY CHECK")
    print("=" * 50)
    
    # Check criteria
    val_loss = summary['best_val_loss']
    target_loss = 0.5
    
    if val_loss < target_loss:
        print(f"‚úÖ Val loss ({val_loss:.4f}) < {target_loss} - EXCELLENT!")
    elif val_loss < 0.8:
        print(f"‚ö†Ô∏è  Val loss ({val_loss:.4f}) - GOOD, could be better")
    else:
        print(f"‚ùå Val loss ({val_loss:.4f}) - needs more training")
    
    print("\nüìã Manual checks needed:")
    print("   - View GLB in 3D viewer")
    print("   - Check for holes at 1m distance")
    print("   - Verify backside geometry looks plausible")
    print("   - Test in AR on phone (iOS/Android)")
else:
    print("‚ùå No training summary found")

## Summary

### ‚úÖ What You've Done:
1. Trained production-quality model (500 samples, 150 epochs)
2. Automatic checkpoint handling (no hardcoding!)
3. Quality validation and metrics
4. Downloaded model and 3D outputs

### üöÄ Next Steps:
- Test on real furniture data
- Deploy as REST API
- Integrate with your platform
- Compare with SAM-3D (optional)

### üíª Local Inference:
```bash
python scripts/infer.py \
    --checkpoint best.pt \
    --input your_data.npy \
    --output result.glb \
    --export_mesh \
    --device cpu
```