# SV-SCN Production Training v2

**Improved with error handling and validation**

- 500 training samples
- 150 epochs (~2-3 hours)
- Automatic checkpoint detection
- Complete error checking


## Step 1: Check GPU

In [None]:
!nvidia-smi

import torch
print(f"\nPyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
else:
    print("⚠️ Enable GPU: Runtime → Change runtime type → GPU")

## Step 2: Clone Project

In [None]:
!git clone https://github.com/ashish-frozo/frozo-3d-model.git
%cd frozo-3d-model

!ls -la svscn/ scripts/ 2>&1 || echo "❌ Project structure not found!"

## Step 3: Install Dependencies

In [None]:
!pip install -q open3d>=0.17.0 trimesh>=4.0.0 scipy>=1.10.0
!pip install -q objaverse>=0.1.7 tensorboard>=2.14.0

from svscn.models import SVSCN
from svscn.config import default_config

print(f"✅ Setup complete - Version: {default_config.VERSION}")

## Step 4: Clean Previous Data (if any)

In [None]:
!rm -rf data/shapenet_500 data/processed_500 data/training_500
!mkdir -p data

print("✅ Clean slate ready")

## Step 5: Generate Training Data (WITH ERROR CHECKING)

This will create ~500 samples with validation at each step

In [None]:
# Step 5a: Generate meshes
!python -m svscn.data.shapenet \
    --placeholder \
    --output_dir data/shapenet_500 \
    --samples_per_category 167

# VERIFY
import subprocess
import sys

result = subprocess.run(['find', 'data/shapenet_500', '-name', '*.obj'], 
                       capture_output=True, text=True)
obj_files = [f for f in result.stdout.strip().split('\n') if f]
num_obj = len(obj_files)

print(f"\n{'='*50}")
print(f"STEP 5a: Generate Meshes")
print(f"{'='*50}")
print(f"OBJ files: {num_obj}")
print(f"Expected: ~500")

if num_obj < 100:
    print(f"❌ ERROR: Only {num_obj} meshes!")
    raise ValueError(f"Expected 500+, got {num_obj}")
else:
    print(f"✅ SUCCESS")

In [None]:
# Step 5b: Preprocess
!python -m svscn.data.preprocess \
    --input_dir data/shapenet_500 \
    --output_dir data/processed_500 \
    --num_points 8192

# VERIFY
result = subprocess.run(['find', 'data/processed_500', '-name', '*.npy'], 
                       capture_output=True, text=True)
pc_files = [f for f in result.stdout.strip().split('\n') if f]
num_pc = len(pc_files)

print(f"\n{'='*50}")
print(f"STEP 5b: Preprocess")
print(f"{'='*50}")
print(f"Point clouds: {num_pc}")
print(f"Expected: ~{num_obj}")

if num_pc < num_obj * 0.9:  # At least 90% success rate
    print(f"❌ ERROR: Only {num_pc}/{num_obj} processed!")
    raise ValueError("Too many preprocessing failures")
else:
    print(f"✅ SUCCESS")

In [None]:
# Step 5c: Augment
!python -m svscn.data.augment \
    --input_dir data/processed_500 \
    --output_dir data/training_500 \
    --views 3

# VERIFY
result_full = subprocess.run(['find', 'data/training_500/full', '-name', '*.npy'], 
                             capture_output=True, text=True)
result_partial = subprocess.run(['find', 'data/training_500/partial', '-name', '*.npy'], 
                                capture_output=True, text=True)

full_files = [f for f in result_full.stdout.strip().split('\n') if f]
partial_files = [f for f in result_partial.stdout.strip().split('\n') if f]

num_full = len(full_files)
num_partial = len(partial_files)

print(f"\n{'='*50}")
print(f"STEP 5c: Create Training Pairs")
print(f"{'='*50}")
print(f"Full clouds: {num_full}")
print(f"Partial clouds: {num_partial}")
print(f"Expected: ~{num_pc * 3} each")
print(f"Unique samples: {num_full // 3}")

if num_full < 300 or num_partial < 300:
    print(f"❌ ERROR: Not enough training pairs!")
    raise ValueError(f"Expected 300+, got {num_full}/{num_partial}")
elif abs(num_full - num_partial) > 5:
    print(f"⚠️ WARNING: Count mismatch!")
else:
    print(f"✅ SUCCESS - Ready for training!")

## Step 6: Create Splits

In [None]:
import numpy as np
from pathlib import Path

training_dir = Path('data/training_500')
full_dir = training_dir / 'full'

samples = set()
for f in full_dir.glob('*_full.npy'):
    name = f.stem.replace('_full', '')
    base = '_'.join(name.split('_')[:-1])
    samples.add(base)

samples = sorted(list(samples))
np.random.seed(42)
np.random.shuffle(samples)

n = len(samples)
train = samples[:int(0.8*n)]
val = samples[int(0.8*n):int(0.9*n)]
test = samples[int(0.9*n):]

splits_dir = training_dir / 'splits'
splits_dir.mkdir(exist_ok=True)

(splits_dir / 'train.txt').write_text('\n'.join(train))
(splits_dir / 'val.txt').write_text('\n'.join(val))
(splits_dir / 'test.txt').write_text('\n'.join(test))

print(f"✅ Splits: {len(train)} train, {len(val)} val, {len(test)} test")

## Step 7: Train (150 epochs, ~2-3 hours)

For quick test: change `--epochs 150` to `--epochs 10`

In [None]:
# Create directories
!mkdir -p checkpoints_prod logs_prod

# Train
!python scripts/train.py \
    --data_dir data/training_500 \
    --epochs 150 \
    --batch_size 32 \
    --checkpoint_dir checkpoints_prod \
    --log_dir logs_prod \
    --device cuda

print("\n✅ Training complete!")

## Step 8: Monitor Training

In [None]:
%load_ext tensorboard
%tensorboard --logdir logs_prod

## Step 9: Auto-Find Checkpoint

In [None]:
import glob

checkpoint_files = glob.glob('checkpoints_prod/*/best.pt')

if not checkpoint_files:
    print("❌ No checkpoint found!")
    CP = None
else:
    CP = sorted(checkpoint_files)[-1]
    print(f"✅ Checkpoint: {CP}")
    !ls -lh {CP}

## Step 10-13: Test, Visualize, Export, Download

In [None]:
# Test input
import numpy as np
partial = np.random.randn(2048, 3).astype(np.float32)
partial = (partial - partial.mean(axis=0)) / partial.std()
np.save('test.npy', partial)

# Inference
if CP:
    !python scripts/infer.py --checkpoint {CP} --input test.npy --output out.npy --class_id 0 --device cuda
    !python scripts/infer.py --checkpoint {CP} --input test.npy --output out.glb --export_mesh --class_id 0 --device cuda
    
    # Download
    from google.colab import files
    files.download(CP)
    files.download('out.glb')
    
    print("\n✅ All done!")