# Vision as LoRA - Colab Test (A100)

Teste rápido do sistema em A100 antes do treino completo em H100.

## 1. Setup Inicial

In [None]:
import sys
from google.colab import drive

drive.mount('/content/drive')

!nvidia-smi

!git clone https://github.com/seu-usuario/vision-as-lora.git
%cd vision-as-lora

In [None]:
!pip install -q torch torchvision transformers peft datasets accelerate wandb pillow tqdm

## 2. Test Configuration

In [None]:
from configs.config import Config, ModelConfig, VisionConfig, LoRAConfig, TrainingConfig, DataConfig, ExperimentConfig

test_config = Config(
    model=ModelConfig(
        model_name="HuggingFaceTB/SmolLM2-135M",
        torch_dtype="bfloat16",
        use_flash_attention=True,
    ),
    vision=VisionConfig(
        image_size=224,
        patch_size=16,
    ),
    lora=LoRAConfig(
        rank=64,
        alpha=128,
        vision_layers=8,
    ),
    data=DataConfig(
        dataset_name="nielsr/coco-captions",
        train_split="train[:1%]",
        val_split="validation[:1%]",
        num_workers=2,
    ),
    training=TrainingConfig(
        output_dir="/content/drive/MyDrive/vision-lora-test",
        run_name="colab-test-a100",
        batch_size=16,
        gradient_accumulation_steps=2,
        max_length=128,
        target_samples=1000,
        learning_rate=5e-4,
        warmup_steps=10,
        logging_steps=5,
        eval_steps=50,
        save_total_limit=1,
        use_torch_compile=False,
    ),
    experiment=ExperimentConfig(
        name="colab_test",
        use_vision=True,
        use_bidirectional_mask=True,
        description="Quick test on A100"
    ),
    wandb_project="vision-lora-test",
)

print(f"Max steps: {test_config.training.max_steps}")
print(f"Effective batch: {test_config.training.effective_batch_size}")

## 3. Test Model Initialization

In [None]:
from src.model.vision_lora_model import VisionLoRAModel

print("Loading model...")
model = VisionLoRAModel(test_config)

model.print_trainable_parameters()

import torch
print(f"\nModel device: {next(model.parameters()).device}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")

## 4. Test Dataset Loading

In [None]:
from src.data.dataset import VisionLanguageDataset
from torch.utils.data import DataLoader

print("Loading dataset...")
train_dataset = VisionLanguageDataset(test_config, split="train")
val_dataset = VisionLanguageDataset(test_config, split="val")

print(f"Train samples: {len(train_dataset)}")
print(f"Val samples: {len(val_dataset)}")

sample = train_dataset[0]
print(f"\nSample keys: {sample.keys()}")
print(f"Pixel values shape: {sample['pixel_values'].shape}")
print(f"Input IDs shape: {sample['input_ids'].shape}")
print(f"Labels shape: {sample['labels'].shape}")

## 5. Test Forward Pass

In [None]:
test_loader = DataLoader(
    train_dataset,
    batch_size=2,
    shuffle=False,
)

batch = next(iter(test_loader))
batch = {k: v.cuda() for k, v in batch.items()}

print("Testing forward pass...")

import torch
with torch.cuda.amp.autocast(enabled=True):
    outputs = model(**batch)

print(f"Loss: {outputs.loss.item():.4f}")
print(f"Logits shape: {outputs.logits.shape}")
print(f"\nMemory allocated: {torch.cuda.memory_allocated() / 1e9:.2f} GB")
print(f"Memory reserved: {torch.cuda.memory_reserved() / 1e9:.2f} GB")

## 6. Test Training Loop (100 steps)

In [None]:
import wandb

wandb.login()

from src.training.trainer import create_trainer

print("Creating trainer...")
trainer = create_trainer(test_config)

print("\nStarting test training...")
trainer.train()

## 7. Test Generation

In [None]:
from src.training.evaluation import Evaluator

evaluator = Evaluator(model, test_config)

val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
)

print("Generating captions...")
captions = evaluator.generate_captions(
    val_loader,
    num_samples=3,
    max_new_tokens=30,
)

for i, (generated, reference) in enumerate(captions, 1):
    print(f"\nSample {i}:")
    print(f"Generated: {generated}")
    print(f"Reference: {reference}")
    print("-" * 60)

## 8. Performance Metrics

In [None]:
import time
import torch

model.eval()

times = []
for _ in range(10):
    batch = next(iter(test_loader))
    batch = {k: v.cuda() for k, v in batch.items()}
    
    torch.cuda.synchronize()
    start = time.time()
    
    with torch.no_grad():
        outputs = model(**batch)
    
    torch.cuda.synchronize()
    times.append(time.time() - start)

avg_time = sum(times) / len(times)
samples_per_sec = 2 / avg_time

print(f"\nPerformance Metrics (A100):")
print(f"Avg inference time: {avg_time*1000:.2f} ms")
print(f"Samples/sec: {samples_per_sec:.2f}")
print(f"Peak memory: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")

## 9. Estimate Full Training Time

In [None]:
batch_size = 32
gradient_accum = 4
effective_batch = batch_size * gradient_accum

target_samples_h100 = 400_000
steps_needed = target_samples_h100 // effective_batch

time_per_step_a100 = avg_time * gradient_accum
total_time_a100_hours = (steps_needed * time_per_step_a100) / 3600

h100_speedup = 1.5
total_time_h100_hours = total_time_a100_hours / h100_speedup

print(f"\nFull Training Estimates:")
print(f"Target samples: {target_samples_h100:,}")
print(f"Steps needed: {steps_needed:,}")
print(f"Estimated time on A100: {total_time_a100_hours:.1f} hours")
print(f"Estimated time on H100: {total_time_h100_hours:.1f} hours")
print(f"\nRecommendation: {'H100' if total_time_h100_hours < 12 else 'Reduce samples'}")

## 10. Save Test Results

In [None]:
import json
from datetime import datetime

results = {
    "timestamp": datetime.now().isoformat(),
    "gpu": torch.cuda.get_device_name(0),
    "model": test_config.model.model_name,
    "samples_tested": len(train_dataset),
    "avg_inference_time_ms": avg_time * 1000,
    "samples_per_sec": samples_per_sec,
    "peak_memory_gb": torch.cuda.max_memory_allocated() / 1e9,
    "estimated_h100_time_hours": total_time_h100_hours,
    "test_loss": outputs.loss.item(),
}

output_file = "/content/drive/MyDrive/test_results.json"
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\nResults saved to: {output_file}")
print(json.dumps(results, indent=2))