# Tactile Manipulation - BC Training on Colab

This notebook trains a Behavior Cloning policy from expert demonstrations using T4/A100 GPU.

## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name()}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")

In [None]:
# Install dependencies
!pip install -q mujoco h5py tensorboard matplotlib tqdm

## 2. Clone Repository

In [None]:
# Clone your repository
# !git clone https://github.com/YOUR_USERNAME/TactileManipulation.git
# %cd TactileManipulation/tactile-rl

# For now, create directories
!mkdir -p tactile-rl/scripts
!mkdir -p tactile-rl/datasets/expert
%cd tactile-rl

## 3. Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create checkpoint directory
import os
checkpoint_dir = '/content/drive/MyDrive/tactile_manipulation_checkpoints'
os.makedirs(checkpoint_dir, exist_ok=True)
print(f"Checkpoints will be saved to: {checkpoint_dir}")

## 4. Upload Training Script and Data

In [None]:
# Option 1: Upload files
from google.colab import files

print("Upload train_bc_policy.py:")
uploaded = files.upload()
!mv train_bc_policy.py scripts/

print("\nUpload expert demonstrations HDF5:")
uploaded = files.upload()
demo_filename = list(uploaded.keys())[0]
!mv {demo_filename} datasets/expert/
demo_path = f'datasets/expert/{demo_filename}'

In [None]:
# Verify demonstrations
import h5py
with h5py.File(demo_path, 'r') as f:
    print(f"Loaded {f.attrs['num_demos']} demonstrations")
    print(f"Environment: {f.attrs['env_name']}")
    print(f"Control frequency: {f.attrs['control_frequency']} Hz")
    
    # Check first demo
    demo0 = f['demo_0']
    print(f"\nDemo 0 length: {demo0.attrs['episode_length']} steps")
    print(f"Success: {demo0.attrs['success']}")
    print(f"Final reward: {demo0.attrs['final_reward']:.3f}")

## 5. Train BC Policy

In [None]:
# Create run directory with timestamp
from datetime import datetime
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
run_dir = f"{checkpoint_dir}/bc_run_{timestamp}"

# Start training
!python scripts/train_bc_policy.py \
    --demos {demo_path} \
    --epochs 150 \
    --batch_size 256 \
    --lr 1e-3 \
    --hidden_dims 256 256 \
    --dropout 0.1 \
    --save_dir {run_dir} \
    --device cuda \
    --log_interval 10

## 6. Monitor Training Progress

In [None]:
# Plot training curves
import json
import matplotlib.pyplot as plt

# Load training history
history_path = f"{run_dir}/training_history.json"
if os.path.exists(history_path):
    with open(history_path, 'r') as f:
        history = json.load(f)
    
    epochs = [h['epoch'] for h in history]
    train_losses = [h['train_loss'] for h in history]
    val_losses = [h['val_loss'] for h in history]
    
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses, label='Train', linewidth=2)
    plt.plot(epochs, val_losses, label='Val', linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    plt.legend()
    plt.title('BC Training Loss')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs, [h['lr'] for h in history], linewidth=2)
    plt.xlabel('Epoch')
    plt.ylabel('Learning Rate')
    plt.title('Cosine Annealing Schedule')
    plt.grid(True, alpha=0.3)
    plt.yscale('log')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Final train loss: {train_losses[-1]:.4f}")
    print(f"Final val loss: {val_losses[-1]:.4f}")
    print(f"Best val loss: {min(val_losses):.4f} at epoch {epochs[val_losses.index(min(val_losses))]}")

## 7. Evaluate Trained Policy

In [None]:
# Load and test the trained model
import torch
import numpy as np

# Import model class
import sys
sys.path.append('scripts')
from train_bc_policy import BCPolicy

# Load checkpoint
checkpoint_path = f"{run_dir}/best_model.pt"
checkpoint = torch.load(checkpoint_path)

# Create model
model = BCPolicy(
    obs_dim=52,
    action_dim=8,
    hidden_dims=[256, 256],
    dropout=0.0  # No dropout for evaluation
).cuda()

# Load weights
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

print(f"Loaded model from epoch {checkpoint['epoch']}")
print(f"Validation loss: {checkpoint['val_loss']:.4f}")

# Test on random inputs
test_obs = torch.randn(5, 52).cuda()
with torch.no_grad():
    actions = model(test_obs)
    print(f"\nSample action predictions:")
    print(f"Shape: {actions.shape}")
    print(f"Range: [{actions.min().item():.3f}, {actions.max().item():.3f}]")
    print(f"\nFirst action: {actions[0].cpu().numpy()}")

## 8. Download Trained Model

In [None]:
# Download the best model and training history
from google.colab import files

print("Downloading trained model...")
files.download(f"{run_dir}/best_model.pt")
files.download(f"{run_dir}/training_history.json")

# Also save to permanent location on Drive
!cp {run_dir}/best_model.pt /content/drive/MyDrive/bc_policy_best.pt
print("\nModel saved to Google Drive as bc_policy_best.pt")

## 9. Training Multiple Seeds (Optional)

In [None]:
# Train multiple seeds for robustness
seeds = [42, 123, 456]

for seed in seeds:
    print(f"\nTraining with seed {seed}...")
    run_dir = f"{checkpoint_dir}/bc_seed_{seed}"
    
    !python scripts/train_bc_policy.py \
        --demos {demo_path} \
        --epochs 150 \
        --batch_size 256 \
        --lr 1e-3 \
        --hidden_dims 256 256 \
        --dropout 0.1 \
        --save_dir {run_dir} \
        --device cuda \
        --log_interval 50  # Less verbose

## Next Steps

After BC training completes (~30-45 minutes on T4):

1. **Evaluate BC Success Rate**
   - Expected: 70-80% success on training distribution
   - Lower on new initial conditions

2. **Prepare for RL Fine-tuning**
   - BC provides good initialization
   - RL will improve robustness and success rate
   - Target: 85-95% success

3. **Save Everything**
   - Model checkpoint
   - Normalization statistics
   - Training curves

The trained BC policy is now ready for RL fine-tuning!