# VishwamAI Training on Google Colab

This notebook provides a simplified interface for training VishwamAI with optimized GPU settings.

In [None]:
# Verify GPU
!nvidia-smi

# Install core dependencies
!pip install torch==2.4.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install transformers==4.34.0 datasets accelerate

# Clone repository
!git clone https://github.com/kasinadhsarma/VishwamAI.git
%cd VishwamAI
!pip install -e .

In [None]:
import torch
from vishwamai.examples.train_model import setup_training
from transformers import Trainer

# Auto-detect GPU type
gpu_name = torch.cuda.get_device_name(0).lower()
if 'a100' in gpu_name:
    gpu_type = 'A100_optimized'
elif 'v100' in gpu_name:
    gpu_type = 'V100_optimized'
else:
    gpu_type = 'T4_optimized'

print(f"Detected GPU: {gpu_name}")
print(f"Using configuration: {gpu_type}")

In [None]:
# Training Configuration
class Args:
    def __init__(self):
        self.config_path = "configs/config_optimized.json"
        self.train_dataset = "gsm8k"
        self.eval_dataset = "cais/mmlu"
        self.output_dir = "./output"
        self.epochs = 3
        self.gpu_type = gpu_type
        self.pretrained_path = None
        self.disable_cache = False
        self.use_wandb = False
        self.push_to_hub = False
        self.run_name = "vishwamai_training"

args = Args()

# Setup training environment
model, datasets, training_args = setup_training(args)

In [None]:
# Initialize trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=datasets["train"],
    eval_dataset=datasets["validation"]
)

In [None]:
# Start training
trainer.train()

In [None]:
# Save trained model
trainer.save_model("final_model")