# Tactile Manipulation - BC Training on Colab

This notebook trains a Behavior Cloning policy from expert demonstrations using T4/A100 GPU.

## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")

In [None]:
# Install dependencies
!pip install -q mujoco h5py tensorboard matplotlib tqdm

## Step 2: Clone Your Repository

In [ ]:
# Clone your repository
!git clone https://github.com/YOUR_USERNAME/TactileManipulation.git
%cd TactileManipulation

# Verify structure
!ls -la
!ls tactile-rl/scripts/
!ls tactile-rl/datasets/expert/

## Step 3: Mount Google Drive for Checkpoints

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create checkpoint directory
import os
checkpoint_dir = '/content/drive/MyDrive/tactile_manipulation_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
print(f"Checkpoints will be saved to: {checkpoint_dir}")

## Step 4: Verify Expert Demonstrations

### Optional: Upload Demonstrations if Not in Repository

If the demonstrations aren't in your repository, you can upload them here.

In [ ]:
# Optional: Upload demonstrations if not in repo
if not demo_files:
    from google.colab import files
    print("Please upload your expert demonstrations HDF5 file...")
    uploaded = files.upload()
    
    # Move to datasets directory
    demo_filename = list(uploaded.keys())[0]
    !mv {demo_filename} tactile-rl/datasets/expert/
    demo_path = f'tactile-rl/datasets/expert/{demo_filename}'
    
    print(f"Uploaded and moved to: {demo_path}")

In [ ]:
# Find and verify expert demonstrations
import os
import h5py

# Look for expert demo files
demo_dir = 'tactile-rl/datasets/expert/'
demo_files = [f for f in os.listdir(demo_dir) if f.endswith('.hdf5')]

print(f"Found {len(demo_files)} demonstration files:")
for f in sorted(demo_files):
    print(f"  - {f}")

# Use the most recent one (or specify which one you want)
if demo_files:
    # Sort by timestamp in filename to get most recent
    demo_files.sort()
    demo_file = demo_files[-1]  # Use the last one (most recent)
    demo_path = os.path.join(demo_dir, demo_file)
    print(f"\nUsing: {demo_file}")
    
    # Verify the file
    with h5py.File(demo_path, 'r') as f:
        print(f"\nLoaded {f.attrs['num_demos']} demonstrations")
        print(f"Environment: {f.attrs['env_name']}")
        print(f"Control frequency: {f.attrs['control_frequency']} Hz")
        
        # Check first demo
        demo0 = f['demo_0']
        print(f"\nDemo 0 length: {demo0.attrs['episode_length']} steps")
        print(f"Success: {demo0.attrs['success']}")
        print(f"Final reward: {demo0.attrs['final_reward']:.3f}")
else:
    print("\n⚠️ No demonstration files found!")
    print("Please upload them using the next cell.")

## 5. Train BC Policy

In [ ]:
# Create run directory with timestamp
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = f"{checkpoint_dir}/bc_run_{timestamp}"

# Start training with the expert demonstrations from the repo
# Note: Training typically takes 30-45 minutes on T4, but may be faster on A100 or with smaller datasets
!cd tactile-rl && python scripts/train_bc_policy.py \
    --demos {demo_path} \
    --epochs 150 \
    --batch_size 256 \
    --lr 1e-3 \
    --hidden_dims 256 256 \
    --dropout 0.1 \
    --save_dir {run_dir} \
    --device cuda \
    --log_interval 10

## 6. Monitor Training Progress

In [None]:
# Plot training curves
import json
import matplotlib.pyplot as plt

# Load training history
history_path = f"{run_dir}/training_history.json"
if os.path.exists(history_path):
    with open(history_path, 'r') as f:
        history = json.load(f)
    
    epochs = [h['epoch'] for h in history]
    train_losses = [h['train_loss'] for h in history]
    val_losses = [h['val_loss'] for h in history]
    
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Train', linewidth=2)
    plt.plot(epochs, val_losses, label='Val', linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.title('BC Training Loss')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs, [h['lr'] for h in history], linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.title('Cosine Annealing Schedule')
    plt.grid(True, alpha=0.3)
    plt.yscale('log')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Final train loss: {train_losses[-1]:.4f}")
    print(f"Final val loss: {val_losses[-1]:.4f}")
    print(f"Best val loss: {min(val_losses):.4f} at epoch {epochs[val_losses.index(min(val_losses))]}")

## 7. Evaluate Trained Policy

In [ ]:
# Load and test the trained model
import torch
import numpy as np

# Add scripts to path
import sys
sys.path.append('tactile-rl/scripts')
from train_bc_policy import BCPolicy

# Load checkpoint with weights_only=False (safe since we created this file)
checkpoint_path = f"{run_dir}/best_model.pt"
checkpoint = torch.load(checkpoint_path, weights_only=False)

# Create model with SAME configuration as during training (including dropout)
model = BCPolicy(
    obs_dim=52,
    action_dim=8,
    hidden_dims=[256, 256],
    dropout=0.1  # MUST match training configuration
).cuda()

# Load weights
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # This will disable dropout during evaluation

print(f"Loaded model from epoch {checkpoint['epoch']}")
print(f"Validation loss: {checkpoint['val_loss']:.4f}")

# Print normalization stats if available
if 'norm_stats' in checkpoint:
    print("\nNormalization statistics loaded:")
    print(f"  Observation dims: {len(checkpoint['norm_stats']['obs_mean'])}")
    print(f"  Action dims: {len(checkpoint['norm_stats']['action_mean'])}")

# Test on random inputs
test_obs = torch.randn(5, 52).cuda()
with torch.no_grad():
    actions = model(test_obs)
    print(f"\nSample action predictions:")
    print(f"Shape: {actions.shape}")
    print(f"Range: [{actions.min().item():.3f}, {actions.max().item():.3f}]")
    print(f"\nFirst action: {actions[0].cpu().numpy()}")

## 8. Download Trained Model

In [ ]:
# Download the best model and training history
from google.colab import files

print("Downloading trained model...")
files.download(f"{run_dir}/best_model.pt")
files.download(f"{run_dir}/training_history.json")

# Also save to permanent location on Drive
!cp {run_dir}/best_model.pt /content/drive/MyDrive/bc_policy_best.pt
print("\nModel saved to Google Drive as bc_policy_best.pt")

# Quick summary of the training
import json
with open(f"{run_dir}/training_history.json", 'r') as f:
    history = json.load(f)
    
print(f"\nTraining Summary:")
print(f"  Total epochs: {len(history)}")
print(f"  Best val loss: {min([h['val_loss'] for h in history]):.4f}")
print(f"  Final train loss: {history[-1]['train_loss']:.4f}")
print(f"  Final val loss: {history[-1]['val_loss']:.4f}")

## 9. Training Multiple Seeds (Optional)

In [ ]:
# Train multiple seeds for robustness
seeds = [42, 123, 456]

for seed in seeds:
    print(f"\nTraining with seed {seed}...")
    run_dir = f"{checkpoint_dir}/bc_seed_{seed}"
    
    !cd tactile-rl && python scripts/train_bc_policy.py \
        --demos {demo_path} \
        --epochs 150 \
        --batch_size 256 \
        --lr 1e-3 \
        --hidden_dims 256 256 \
        --dropout 0.1 \
        --save_dir {run_dir} \
        --device cuda \
        --log_interval 50  # Less verbose

## Next Steps

After BC training completes (~30-45 minutes on T4):

1. **Evaluate BC Success Rate**
   - Expected: 70-80% success on training distribution
   - Lower on new initial conditions

2. **Prepare for RL Fine-tuning**
   - BC provides good initialization
   - RL will improve robustness and success rate
   - Target: 85-95% success

3. **Save Everything**
   - Model checkpoint
   - Normalization statistics
   - Training curves

The trained BC policy is now ready for RL fine-tuning!