In [None]:
# Upload and extract dataset
from google.colab import files
import zipfile
import os

print("Please upload dataset_v2.zip...")
uploaded = files.upload()

# Extract
with zipfile.ZipFile('dataset_v2.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/')

print("\nExtracted contents:")
!ls -la /content/dataset_v2/

In [None]:
# Imports
import numpy as np
from pathlib import Path
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import matplotlib.pyplot as plt
import time
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
# Configuration
DATA_DIR = Path("/content/dataset_v2")

TRAIN_SESSIONS = [
    "4th_floor_hallway_20251206_132136",
    "4th_floor_lounge_20251206_154822",
    "5th_floor_hallway_20251206_161536",
    "3rd_floor_hallway_20251206_162223",
]

TEST_SESSIONS = [
    "Mlab_20251207_112819",
]

# Hyperparameters
BATCH_SIZE = 32
NUM_EPOCHS = 15
LEARNING_RATE = 0.001
IMAGE_SIZE = 224

In [None]:
# Dataset class
class CameraLiDARDataset(Dataset):
    """Dataset that pairs camera images with LiDAR-derived targets."""

    def __init__(self, sessions, data_dir, transform=None):
        self.data_dir = Path(data_dir)
        self.transform = transform
        self.samples = []

        for session in sessions:
            session_dir = self.data_dir / session
            image_dir = session_dir / "frames"
            velodyne_dir = session_dir / "velodyne"

            if not image_dir.exists() or not velodyne_dir.exists():
                print(f"  Warning: Missing data in {session}")
                continue

            image_files = sorted(image_dir.glob("*.png"))

            for img_path in image_files:
                frame_id = img_path.stem
                lidar_path = velodyne_dir / f"{frame_id}.bin"

                if lidar_path.exists():
                    points = np.fromfile(str(lidar_path), dtype=np.float32).reshape(-1, 5)
                    x, y, z = points[:, 0], points[:, 1], points[:, 2]
                    mean_distance = np.sqrt(x**2 + y**2 + z**2).mean()
                    self.samples.append((img_path, mean_distance))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, target = self.samples[idx]
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, torch.tensor(target, dtype=torch.float32)

In [None]:
# Model class
class ResNetRegressor(nn.Module):
    """ResNet18 modified for regression."""

    def __init__(self, pretrained=True):
        super(ResNetRegressor, self).__init__()
        self.resnet = models.resnet18(weights='IMAGENET1K_V1' if pretrained else None)
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 1)
        )

    def forward(self, x):
        return self.resnet(x).squeeze()

In [None]:
# Training and evaluation functions
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for images, targets in tqdm(dataloader, desc="Training", leave=False):
        images, targets = images.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    all_preds, all_targets = [], []
    with torch.no_grad():
        for images, targets in dataloader:
            images, targets = images.to(device), targets.to(device)
            outputs = model(images)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            all_preds.extend(outputs.cpu().numpy())
            all_targets.extend(targets.cpu().numpy())

    all_preds, all_targets = np.array(all_preds), np.array(all_targets)
    mae = np.abs(all_preds - all_targets).mean()
    rmse = np.sqrt(((all_preds - all_targets) ** 2).mean())
    ss_res = ((all_targets - all_preds) ** 2).sum()
    ss_tot = ((all_targets - all_targets.mean()) ** 2).sum()
    r2 = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0

    return {'loss': total_loss / len(dataloader), 'mae': mae, 'rmse': rmse, 'r2': r2,
            'predictions': all_preds, 'targets': all_targets}

In [None]:
# Load datasets
print("Loading datasets...")

train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = CameraLiDARDataset(TRAIN_SESSIONS, DATA_DIR, transform=train_transform)
test_dataset = CameraLiDARDataset(TEST_SESSIONS, DATA_DIR, transform=test_transform)

print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

In [None]:
# Show sample images
print("\nðŸ“· Sample Camera Images with LiDAR Distances:")
fig, axes = plt.subplots(2, 4, figsize=(16, 8))
for i, ax in enumerate(axes.flat):
    if i < len(train_dataset):
        img_path, dist = train_dataset.samples[i * 100]  # Sample every 100th
        img = Image.open(img_path)
        ax.imshow(img)
        ax.set_title(f"Distance: {dist:.2f}m", fontsize=10)
        ax.axis('off')
plt.suptitle("Sample Training Images with Ground Truth LiDAR Distances", fontsize=14)
plt.tight_layout()
plt.savefig('sample_images.png', dpi=150)
plt.show()

In [None]:
# Create model
print("\nðŸ§  Creating ResNet18 Model...")
model = ResNetRegressor(pretrained=True).to(device)
print(f"Total parameters: {sum(p.numel() for p in model.parameters()):,}")

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# Training loop
print(f"\nðŸš€ Training for {NUM_EPOCHS} epochs...")
print("="*60)

train_losses, val_losses, val_maes = [], [], []
start_time = time.time()

for epoch in range(NUM_EPOCHS):
    train_loss = train_epoch(model, train_loader, criterion, optimizer, device)
    val_results = evaluate(model, test_loader, criterion, device)

    train_losses.append(train_loss)
    val_losses.append(val_results['loss'])
    val_maes.append(val_results['mae'])

    print(f"Epoch {epoch+1:2d}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_results['loss']:.4f} | Val MAE: {val_results['mae']:.4f}m")

total_time = time.time() - start_time
print(f"\nâœ… Training completed in {total_time:.1f} seconds")

In [None]:
# Final evaluation
print("\nðŸ“Š Final Evaluation on Test Set...")
final_results = evaluate(model, test_loader, criterion, device)

print(f"\n{'='*40}")
print("TEST RESULTS")
print(f"{'='*40}")
print(f"  MAE:  {final_results['mae']:.4f} meters")
print(f"  RMSE: {final_results['rmse']:.4f} meters")
print(f"  RÂ²:   {final_results['r2']:.4f}")
print(f"{'='*40}")

In [None]:
# Plot 1: Training History
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Loss curves
ax1 = axes[0]
epochs = range(1, NUM_EPOCHS + 1)
ax1.plot(epochs, train_losses, 'b-', label='Training Loss', linewidth=2)
ax1.plot(epochs, val_losses, 'r-', label='Validation Loss', linewidth=2)
ax1.set_xlabel('Epoch', fontsize=12)
ax1.set_ylabel('Loss (MSE)', fontsize=12)
ax1.set_title('Training & Validation Loss', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

# MAE curve
ax2 = axes[1]
ax2.plot(epochs, val_maes, 'g-', linewidth=2, marker='o')
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('MAE (meters)', fontsize=12)
ax2.set_title('Validation MAE Over Training', fontsize=14)
ax2.grid(True, alpha=0.3)

plt.suptitle('ResNet18 Training Progress - Camera to LiDAR Distance Prediction', fontsize=14, y=1.02)
plt.tight_layout()
plt.savefig('training_history.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Plot 2: Predictions Analysis
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

targets = final_results['targets']
predictions = final_results['predictions']

# Predicted vs Actual
ax1 = axes[0, 0]
ax1.scatter(targets, predictions, alpha=0.5, s=20, c='blue')
min_val, max_val = min(targets.min(), predictions.min()), max(targets.max(), predictions.max())
ax1.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2, label='Perfect prediction')
ax1.set_xlabel('Actual Distance (m)', fontsize=12)
ax1.set_ylabel('Predicted Distance (m)', fontsize=12)
ax1.set_title('Predicted vs Actual (Test Set)', fontsize=14)
ax1.legend()
ax1.grid(True, alpha=0.3)

# Residual plot
ax2 = axes[0, 1]
residuals = predictions - targets
ax2.scatter(predictions, residuals, alpha=0.5, s=20, c='green')
ax2.axhline(y=0, color='r', linestyle='--', lw=2)
ax2.set_xlabel('Predicted Distance (m)', fontsize=12)
ax2.set_ylabel('Residual (Pred - Actual)', fontsize=12)
ax2.set_title('Residual Plot', fontsize=14)
ax2.grid(True, alpha=0.3)

# Error distribution
ax3 = axes[1, 0]
ax3.hist(residuals, bins=30, color='purple', alpha=0.7, edgecolor='black')
ax3.axvline(x=0, color='r', linestyle='--', lw=2)
ax3.axvline(x=residuals.mean(), color='orange', linestyle='-', lw=2, label=f'Mean: {residuals.mean():.3f}m')
ax3.set_xlabel('Prediction Error (m)', fontsize=12)
ax3.set_ylabel('Frequency', fontsize=12)
ax3.set_title('Error Distribution', fontsize=14)
ax3.legend()
ax3.grid(True, alpha=0.3)

# Prediction timeline
ax4 = axes[1, 1]
frames = np.arange(len(targets))
ax4.plot(frames, targets, 'b-', alpha=0.7, label='Actual', linewidth=1)
ax4.plot(frames, predictions, 'r-', alpha=0.7, label='Predicted', linewidth=1)
ax4.set_xlabel('Frame Index', fontsize=12)
ax4.set_ylabel('Distance (m)', fontsize=12)
ax4.set_title('Prediction Timeline (Test Session)', fontsize=14)
ax4.legend()
ax4.grid(True, alpha=0.3)

plt.suptitle(f'ResNet18 Results - MAE: {final_results["mae"]:.4f}m, RÂ²: {final_results["r2"]:.4f}', fontsize=14, y=1.02)
plt.tight_layout()
plt.savefig('prediction_results.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
# Summary
print("\n" + "="*70)
print("ðŸ“‹ FINAL SUMMARY")
print("="*70)
print(f"""
DATASET:
  - Source: Unitree Go1 Robot + RoboSense Helios-16 LiDAR
  - Data: 100% REAL sensor measurements (NO synthetic data)
  - Train: {len(train_dataset)} frames from 4 sessions
  - Test: {len(test_dataset)} frames from 1 held-out session (Mlab)

TASK:
  - Cross-modal learning: Predict LiDAR distance from camera image
  - Input: RGB camera image (1856x800 â†’ 224x224)
  - Output: Mean LiDAR distance (meters)

MODEL:
  - Architecture: ResNet18 (pretrained on ImageNet)
  - Training: {NUM_EPOCHS} epochs, batch size {BATCH_SIZE}
  - Training time: {total_time:.1f} seconds

RESULTS:
  - Test MAE:  {final_results['mae']:.4f} meters
  - Test RMSE: {final_results['rmse']:.4f} meters
  - Test RÂ²:   {final_results['r2']:.4f}

SAVED FIGURES:
  - sample_images.png
  - training_history.png
  - prediction_results.png
""")
print("="*70)

In [None]:
# Download generated figures
from google.colab import files
files.download('sample_images.png')
files.download('training_history.png')
files.download('prediction_results.png')