# VishwamAI Model Training on Google Colab

This notebook provides a simplified way to train the VishwamAI model using Google Colab's GPU resources.

In [None]:
# Verify GPU access
!nvidia-smi

In [None]:
# Install dependencies
!pip install torch transformers datasets huggingface_hub accelerate

In [None]:
# Clone repository
!git clone https://github.com/kasinadhsarma/VishwamAI.git
%cd VishwamAI
!pip install -r requirements.txt

In [None]:
# HuggingFace login
from huggingface_hub import notebook_login
notebook_login()

In [None]:
import os
import torch
from transformers import TrainingArguments, Trainer
from datasets import load_dataset
from vishwamai.model_utils import load_model

# Load datasets
datasets = {
    "gsm8k": load_dataset("openai/gsm8k", split="train"),
    "mmlu": load_dataset("cais/mmlu", split="train")
}

In [None]:
# Load model with optimized settings for Colab
model = load_model(
    "vishwamai/configs/config_optimized.json",
    device="cuda",
    hidden_size=2048,
    num_hidden_layers=12,
    num_attention_heads=16
)

In [None]:
# Configure training
training_args = TrainingArguments(
    output_dir="training_output",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    weight_decay=0.01,
    logging_dir="logs",
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,
    gradient_checkpointing=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=datasets["gsm8k"],
    eval_dataset=datasets["mmlu"]
)

In [None]:
# Start training
trainer.train()

In [None]:
# Save and upload model
trainer.save_model("final_model")

from huggingface_hub import HfApi
api = HfApi()

# Upload to your HuggingFace repository
api.upload_folder(
    folder_path="final_model",
    repo_id="kasinadhsarma/vishwamai-model",
    repo_type="model"
)