In [None]:
   # Create necessary directories
   #!mkdir -p /content/cifar10_images
   #!mkdir -p /content/output
   #!mkdir -p /content/checkpoints

   # Upload your files (you can do this manually through Colab's file upload interface)
   # Make sure your files are in these locations:
   # - /content/cifar10_images/ (containing all your CIFAR10 images)
   # - /content/cifar10_qa.json (your JSON file with Q&A pairs)

   # First, let's properly install bitsandbytes
!pip uninstall -y bitsandbytes
!pip install bitsandbytes==0.41.1
!pip install accelerate==0.27.2

In [None]:
   # Check if files are in the correct locations
   import os
   import json

   # Check images
   print("Images in cifar10_images folder:")
   print(os.listdir("/content/drive/MyDrive/Assignment_23/cifar10_images"))

   # Check JSON file
   with open("/content/drive/MyDrive/Assignment_23/cifar10_qa_colab.json", 'r') as f:
       data = json.load(f)
       print("\nNumber of images in JSON:", len(data))
       print("\nSample of first image data:")
       print(json.dumps(data[0], indent=2))

In [None]:
# Cell 1: Updated imports
!pip install torch torchvision transformers accelerate peft datasets pillow sentencepiece
!pip install git+https://github.com/huggingface/transformers.git

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from transformers import (
    AutoProcessor,
    AutoModel,
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training
)
from PIL import Image
import json
import os
from typing import Dict, List, Tuple
import numpy as np
from datasets import Dataset as HFDataset
import gc

# Clear GPU memory
torch.cuda.empty_cache()
gc.collect()

In [4]:
# Cell 2: Configuration
class Config:
    # Model paths
    SIGLIP_MODEL = "google/siglip-so400m-patch14-384"  # Using base SigLIP model
    PHI3_MODEL = "microsoft/Phi-3-mini-128k-instruct"

    # Training parameters
    BATCH_SIZE = 4
    LEARNING_RATE = 2e-4
    NUM_EPOCHS = 3
    WARMUP_STEPS = 100

    # qLoRA parameters
    LORA_R = 16
    LORA_ALPHA = 32
    LORA_DROPOUT = 0.05

    # Data paths (Colab-specific)
    IMAGE_DIR = "/content/drive/MyDrive/Assignment_23/cifar10_images"
    QA_JSON_PATH = "/content/drive/MyDrive/Assignment_23/cifar10_qa_colab.json"

    # Output paths
    OUTPUT_DIR = "/content/drive/MyDrive/Assignment_23/output"
    CHECKPOINT_DIR = "/content/drive/MyDrive/Assignment_23/checkpoints"

In [5]:
# Cell 3: Updated dataset class for SigLIP
class CIFAR10QADataset(Dataset):
    """Custom dataset class for CIFAR10 Q&A pairs"""

    def __init__(self, image_dir: str, qa_json_path: str, processor):
        self.image_dir = image_dir
        self.processor = processor
        self.data = self._load_data(qa_json_path)

    def _load_data(self, qa_json_path: str) -> List[Dict]:
        """Load and process Q&A JSON file"""
        with open(qa_json_path, 'r') as f:
            data = json.load(f)
        return data

    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, idx: int) -> Dict:
        item = self.data[idx]
        image_path = os.path.join(self.image_dir, item['image_path'])
        image = Image.open(image_path).convert('RGB')

        # Process image and text
        inputs = self.processor(
            images=image,
            text=item['questions'][0]['question'],
            return_tensors="pt",
            padding=True
        )

        # Convert to format expected by SigLIP
        return {
            'pixel_values': inputs['pixel_values'].squeeze(0),
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0) if 'attention_mask' in inputs else torch.ones_like(inputs['input_ids']).squeeze(0)
        }

In [6]:
# Cell 4: Simplified model setup (without quantization)
def setup_models(config: Config) -> Tuple[nn.Module, nn.Module]:
    """Initialize and setup SigLIP and Phi3 models"""

    # Setup SigLIP
    siglip_processor = AutoProcessor.from_pretrained(config.SIGLIP_MODEL)
    siglip_model = AutoModel.from_pretrained(
        config.SIGLIP_MODEL,
        torch_dtype=torch.float16,
        device_map="auto"
    )

    # Setup Phi3 (frozen)
    phi3_model = AutoModelForCausalLM.from_pretrained(
        config.PHI3_MODEL,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True
    )

    # Freeze Phi3 parameters
    for param in phi3_model.parameters():
        param.requires_grad = False

    # Setup Phi3 tokenizer
    phi3_tokenizer = AutoTokenizer.from_pretrained(config.PHI3_MODEL)

    return siglip_model, phi3_model, siglip_processor, phi3_tokenizer

In [7]:
# Cell 5: Updated training setup with compatible arguments
def setup_training(config: Config, model: nn.Module, train_dataset: Dataset):
    """Setup training arguments and trainer"""

    # Custom trainer class for SigLIP
    class SigLIPTrainer(Trainer):
        def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
            # Forward pass
            outputs = model(
                pixel_values=inputs['pixel_values'],
                input_ids=inputs['input_ids'],
                attention_mask=inputs['attention_mask']
            )

            # Compute contrastive loss
            logits_per_image = outputs.logits_per_image
            logits_per_text = outputs.logits_per_text

            # Create labels for contrastive learning (diagonal matrix)
            batch_size = logits_per_image.shape[0]
            labels = torch.arange(batch_size, device=logits_per_image.device)

            # Compute loss
            loss_img = torch.nn.functional.cross_entropy(logits_per_image, labels)
            loss_txt = torch.nn.functional.cross_entropy(logits_per_text, labels)
            loss = (loss_img + loss_txt) / 2

            return (loss, outputs) if return_outputs else loss

    training_args = TrainingArguments(
        output_dir=config.OUTPUT_DIR,
        per_device_train_batch_size=config.BATCH_SIZE,
        gradient_accumulation_steps=4,
        learning_rate=config.LEARNING_RATE,
        num_train_epochs=config.NUM_EPOCHS,
        warmup_steps=config.WARMUP_STEPS,
        logging_dir='logs',
        logging_steps=1,  # Log every step
        save_strategy="steps",  # Save more frequently
        save_steps=10,
        fp16=True,
        remove_unused_columns=False,
        optim="adamw_torch",
        gradient_checkpointing=True,
        report_to="none"  # Disable wandb
    )

    trainer = SigLIPTrainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset
    )

    return trainer

In [None]:
# Cell 6: Updated main function with verification
def main():
    # Initialize configuration
    config = Config()

    # Create output directories
    os.makedirs(config.OUTPUT_DIR, exist_ok=True)
    os.makedirs(config.CHECKPOINT_DIR, exist_ok=True)

    # Setup models
    print("Setting up models...")
    siglip_model, phi3_model, siglip_processor, phi3_tokenizer = setup_models(config)

    # Setup dataset
    print("Setting up dataset...")
    train_dataset = CIFAR10QADataset(
        image_dir=config.IMAGE_DIR,
        qa_json_path=config.QA_JSON_PATH,
        processor=siglip_processor
    )

    # Setup training
    print("Setting up training...")
    trainer = setup_training(config, siglip_model, train_dataset)

    # Train the model
    print("Starting training...")
    trainer.train()

    # Save the final model
    final_model_path = os.path.join(config.OUTPUT_DIR, "final_model")
    trainer.save_model(final_model_path)
    print("Training completed and model saved!")

    # Verify the saved model
    print("\nVerifying saved model...")
    if os.path.exists(final_model_path):
        print(f"Model saved successfully at: {final_model_path}")
        print("Contents of saved directory:")
        print(os.listdir(final_model_path))

        # Try loading the saved model
        try:
            saved_model = AutoModel.from_pretrained(final_model_path)
            print("\nSuccessfully loaded the saved model!")
            print(f"Model type: {type(saved_model)}")
        except Exception as e:
            print(f"\nError loading saved model: {e}")
    else:
        print("Model was not saved properly")

if __name__ == "__main__":
    main()