<a href="https://colab.research.google.com/github/ajaysuseel/MiniProject_AD/blob/main/finetuning_Lightning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install lightning

Collecting lightning
  Downloading lightning-2.5.1-py3-none-any.whl.metadata (39 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.14.2-py3-none-any.whl.metadata (5.6 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.7.0-py3-none-any.whl.metadata (21 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Co

In [1]:
import torch
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from transformers import BlipProcessor, BlipForConditionalGeneration
from pytorch_lightning.callbacks import ModelCheckpoint
import json
import requests
from PIL import Image
from torchvision import transforms

In [2]:
# Configurable dataset path from GitHub
GITHUB_REPO = "https://raw.githubusercontent.com/ajaysuseel/MiniProject_AD/main/augmented/"
JSON_FILE = "captions.json"
IMAGES_FOLDER = "images/"

In [3]:
# Dataset Class
class CaptionDataset(Dataset):
    def __init__(self, processor):
        json_url = GITHUB_REPO + JSON_FILE
        try:
            response = requests.get(json_url)
            response.raise_for_status()
            self.data = response.json()
            print(f"Loaded {len(self.data)} image-caption pairs.")
        except requests.exceptions.RequestException as e:
            print(f"Error loading dataset: {e}")
            self.data = []

        self.processor = processor
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        image_url = GITHUB_REPO + IMAGES_FOLDER + item["filename"]

        try:
            image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
        except Exception as e:
            print(f"Error loading image {item['filename']}: {e}")
            return None

        pixel_values = self.transform(image)
        encoding = self.processor(
            text=item["description"],
            padding="max_length",
            return_tensors="pt"
        )

        return {
            "pixel_values": pixel_values,
            "input_ids": encoding.input_ids.squeeze(0),
            "attention_mask": encoding.attention_mask.squeeze(0),
            "labels": encoding.input_ids.squeeze(0)
        }


In [11]:
# BLIP Lightning Module
class BlipLightning(pl.LightningModule):
    def __init__(self, model_name="Salesforce/blip-image-captioning-base", learning_rate=5e-5):
        super().__init__()
        self.model = BlipForConditionalGeneration.from_pretrained(model_name)
        self.processor = BlipProcessor.from_pretrained(model_name, use_fast=True)
        self.learning_rate = learning_rate

    def forward(self, pixel_values, input_ids, attention_mask, labels):
        return self.model(
            pixel_values=pixel_values,
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

    def training_step(self, batch, batch_idx):
        outputs = self(
            pixel_values=batch["pixel_values"],
            input_ids=batch["input_ids"],
            attention_mask=batch["attention_mask"],
            labels=batch["labels"]
        )
        loss = outputs.loss
        self.log("train_loss", loss, prog_bar=True)
        return loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.learning_rate)

In [13]:
# DataLoader Wrapper
def create_dataloader(batch_size=4):
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base",use_fast=True)
    dataset = CaptionDataset(processor)

    def collate_fn(batch):
        batch = [b for b in batch if b is not None]
        if len(batch) == 0:
            return None
        keys = batch[0].keys()
        return {key: torch.stack([b[key] for b in batch]) for key in keys}

    return DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)


In [12]:
# Training Function
def train_model(num_epochs=3):
    dataloader = create_dataloader()

    checkpoint_callback = ModelCheckpoint(
        monitor="train_loss", mode="min", save_top_k=1, dirpath="checkpoints/", filename="blip_best"
    )

    trainer = pl.Trainer(max_epochs=num_epochs, callbacks=[checkpoint_callback], accelerator="auto")
    model = BlipLightning()

    model.train()#train mode
    trainer.fit(model, dataloader)

    print("✅ Fine-tuning complete. Best model saved!")

In [14]:
# Run Training
if __name__ == "__main__":
    train_model(num_epochs=7)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Loaded 1270 image-caption pairs.


INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name  | Type                         | Params | Mode 
---------------------------------------------------------------
0 | model | BlipForConditionalGeneration | 247 M  | train
---------------------------------------------------------------
247 M     Trainable params
0         Non-trainable params
247 M     Total params
989.656   Total estimated model params size (MB)
491       Modules in train mode
0         Modules in eval mode


Training: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=7` reached.


✅ Fine-tuning complete. Best model saved!


In [18]:
from transformers import BlipProcessor

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
processor.save_pretrained("checkpoints/blip_processor")

[]

#Gdrive

In [19]:
import os
from google.colab import drive

# 🚀 Mount Google Drive
drive.mount('/content/drive')

# 🔄 Define source & destination paths
MODELS_SRC = "/content/checkpoints"
GDRIVE_DEST = "/content/drive/MyDrive/lightning_models/models"

# ✅ Copy models folder to Google Drive
if os.path.exists(MODELS_SRC):
    !cp -r {MODELS_SRC} {GDRIVE_DEST}
    print(f"✅ Checkpont folder successfully copied to Google Drive: {GDRIVE_DEST}")
else:
    print("⚠️ No 'checkpoint' folder found in /content/. Please check your path.")

Mounted at /content/drive
✅ Checkpont folder successfully copied to Google Drive: /content/drive/MyDrive/lightning_models/models


In [20]:
# 🔄 Define source & destination paths
MODELS_SRC = "/content/lightning_logs"
GDRIVE_DEST = "/content/drive/MyDrive/lightning_models/logs"

# ✅ Copy models folder to Google Drive
if os.path.exists(MODELS_SRC):
    !cp -r {MODELS_SRC} {GDRIVE_DEST}
    print(f"✅ Logs folder successfully copied to Google Drive: {GDRIVE_DEST}")
else:
    print("⚠️ No 'logs' folder found in /content/. Please check your path.")

✅ Logs folder successfully copied to Google Drive: /content/drive/MyDrive/lightning_models/logs


In [21]:
drive.flush_and_unmount()

#Evaluation