In [None]:
import torch
import json
import requests
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch.nn.functional as F
from tqdm import tqdm


#CONFIGURABLE VARIABLES

In [None]:
# Configurable variables
GITHUB_REPO = "https://raw.githubusercontent.com/ajaysuseel/MiniProject_AD/main/data/"
JSON_FILE = "captions.json"
IMAGES_FOLDER = "images/"

#Function: Load BLIP Model

In [None]:
def load_blip_model():
    model_name = "Salesforce/blip-image-captioning-base"
    processor = BlipProcessor.from_pretrained(model_name)
    model = BlipForConditionalGeneration.from_pretrained(model_name)
    return model, processor

#Function: Load Dataset from GitHub

In [None]:
def load_dataset():
    json_url = GITHUB_REPO + JSON_FILE
    try:
        response = requests.get(json_url)
        response.raise_for_status()
        data = response.json()
        print(f"Loaded {len(data)} image-caption pairs.")
        return data
    except requests.exceptions.RequestException as e:
        print(f"Error loading dataset: {e}")
        return []

#Custom Dataset Class

In [None]:
class CaptionDataset(Dataset):
    def __init__(self, data, processor, images_dir):
        self.data = data
        self.processor = processor
        self.images_dir = images_dir

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        image_url = self.images_dir + item["filename"]

        try:
            image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
        except Exception as e:
            print(f"Error loading image {item['filename']}: {e}")
            return None

        encoding = self.processor(
            text=item["description"],
            images=image,
            return_tensors="pt",
            padding="max_length",
            truncation=True
        )
        encoding = {key: val.squeeze(0) for key, val in encoding.items()}
        encoding["labels"] = encoding["input_ids"]
        return encoding

#Create DataLoader

In [None]:
def create_dataloader(data, processor, batch_size=4):
    dataset = CaptionDataset(data, processor, GITHUB_REPO + IMAGES_FOLDER)

    def collate_fn(batch):
        batch = [b for b in batch if b is not None]
        if len(batch) == 0:
            return None
        keys = batch[0].keys()
        return {key: torch.stack([b[key] for b in batch]) for key in keys}

    return DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

#Train Model

In [None]:
def train_blip(model, dataloader, num_epochs=3, learning_rate=5e-5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.train()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    print(f"🚀 Starting fine-tuning on {device} for {num_epochs} epochs...")

    for epoch in range(num_epochs):
        epoch_loss = 0.0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")

        for batch in progress_bar:
            optimizer.zero_grad()
            pixel_values = batch["pixel_values"].to(device)
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(
                pixel_values=pixel_values,
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

        print(f"Epoch {epoch+1} completed | Average Loss: {epoch_loss:.4f}")

    model.save_pretrained("./models/finetuned_blip1")
    processor.save_pretrained("./models/finetuned_blip1")
    print("Fine-tuning complete and model saved!")

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"


In [None]:
if __name__ == "__main__":
    model, processor = load_blip_model()
    data = load_dataset()

    if not data:
        print("No data found. Exiting.")
    else:
        dataloader = create_dataloader(data, processor)
        if dataloader is None:
            print("Error: No valid data samples found. Exiting.")
        else:
            train_blip(model, dataloader, num_epochs=20)


#Gdrive

In [None]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define source & destination paths
MODELS_SRC = "/content/models"
GDRIVE_DEST = "/content/drive/MyDrive/models"

# Copy models folder to Google Drive
if os.path.exists(MODELS_SRC):
    !cp -r {MODELS_SRC} {GDRIVE_DEST}
    print(f"Models folder successfully copied to Google Drive: {GDRIVE_DEST}")
else:
    print("No 'models' folder found in /content/. Please check your path.")


In [None]:
from google.colab import drive
drive.flush_and_unmount()

In [None]:
from google.colab import drive
import shutil
import os

# Mount Google Drive
drive.mount('/content/drive')

# Source (Google Drive folder)
src_folder = "/content/drive/MyDrive/model"

# Destination (Colab models folder)
dest_folder = "/content/models"

# # Remove existing folder if it exists
# if os.path.exists(dest_folder):
#     shutil.rmtree(dest_folder)
#     print("Existing 'models' folder deleted.")

# Copy the folder from Drive
shutil.copytree(src_folder, dest_folder)
print("Models folder copied successfully from Drive!")


In [None]:
# from google.colab import drive
# drive.flush_and_unmount()

#Evaluation

In [None]:
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
import matplotlib.pyplot as plt

In [None]:
import os
os.chdir('/content')  # Change to a known existing directory

!git clone https://github.com/ajaysuseel/MiniProject_AD.git

In [None]:
IMAGE_FOLDER = "/content/MiniProject_AD/raw_data/pranav/images"  # Image folder
CAPTIONS_FILE = "/content/MiniProject_AD/raw_data/pranav/captions.json"  # JSON with filename-description
MODEL_PATH = "./models/finetuned_blip1"  # Fine-tuned BLIP model path

In [None]:
def load_model_and_processor(model_path):
    try:
        processor = BlipProcessor.from_pretrained(model_path)
        model = BlipForConditionalGeneration.from_pretrained(model_path)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        model.eval()
        print(f"Model loaded on {device}")
        return model, processor, device
    except Exception as e:
        print(f"Error loading model: {e}")
        return None, None, None
def load_ground_truth(local_json_path):
    try:
        with open(local_json_path, "r") as f:
            gt_data = json.load(f)
            gt_data = {item["filename"]: item["description"] for item in gt_data}
        print(f"Loaded {len(gt_data)} ground truth captions.")
        return gt_data
    except Exception as e:
        print(f"Error loading ground truth: {e}")
        return {}
def load_image(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
        return image
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None


In [None]:
def generate_caption(model, processor, device, image):
    try:
        inputs = processor(images=image, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        with torch.no_grad():
            output_ids = model.generate(**inputs)
        return processor.decode(output_ids[0], skip_special_tokens=True)
    except Exception as e:
        print(f"Error generating caption: {e}")
        return ""

In [None]:
#for metrics - run only if they are not available
!pip install rouge-score
!pip install torchmetrics

In [None]:
#for cider
#!git clone https://github.com/tylin/coco-caption
#!pip install -e coco-caption


In [None]:
#for meteor
import nltk
nltk.download('wordnet')


In [None]:
def display_image_with_captions(image_path, gt_caption, generated_caption, bleu_score, meteor_score, rouge_score):
    image = load_image(image_path)
    if image is None:
        return
    plt.figure(figsize=(8, 6))
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"GT: {gt_caption}\nGen: {generated_caption}\nBLEU: {bleu_score:.4f} | METEOR: {meteor_score:.4f} | ROUGE: {rouge_score:.4f}", fontsize=10)
    plt.show()


In [None]:
import os
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu
from nltk.translate.meteor_score import meteor_score
from rouge_score import rouge_scorer
# from pycocoevalcap.cider.cider import Cider  # Commented out due to cider installation issues
from collections import defaultdict

def evaluate_model(image_folder, gt_json_path, model_path):
    gt_captions = load_ground_truth(gt_json_path)
    if not gt_captions:
        print("No ground truth data available. Exiting evaluation.")
        return

    model, processor, device = load_model_and_processor(model_path)
    if model is None:
        print("Model loading failed. Exiting evaluation.")
        return

    generated_captions = {}
    references = []
    hypotheses = []
    meteor_scores = []
    rouge_scores = []
    # cider_scores = []  # Commented out CIDEr

    image_files = [f for f in os.listdir(image_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    print(f"🔹 Found {len(image_files)} images in {image_folder}.")

    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    for filename in tqdm(image_files, desc="Evaluating Images"):
        if filename not in gt_captions:
            continue

        image_path = os.path.join(image_folder, filename)
        image = load_image(image_path)
        if image is None:
            continue

        gen_caption = generate_caption(model, processor, device, image)
        generated_captions[filename] = gen_caption

        hypothesis = gen_caption.split()
        reference = [gt_captions[filename].split()]

        # BLEU Score
        bleu_score = sentence_bleu(reference, hypothesis)

        # METEOR Score
        meteor = meteor_score([gt_captions[filename].split()], gen_caption.split())
        meteor_scores.append(meteor)

        # ROUGE Score
        rouge = scorer.score(gt_captions[filename], gen_caption)["rougeL"].fmeasure
        rouge_scores.append(rouge)

        # Store for Corpus BLEU calculation
        references.append(reference)
        hypotheses.append(hypothesis)

        # Display Image with Captions & Scores
        display_image_with_captions(image_path, gt_captions[filename], gen_caption, bleu_score, meteor, rouge)

    # Compute Corpus Scores
    corpus_bleu_score = corpus_bleu(references, hypotheses)
    avg_meteor_score = sum(meteor_scores) / len(meteor_scores) if meteor_scores else 0
    avg_rouge_score = sum(rouge_scores) / len(rouge_scores) if rouge_scores else 0

    print("\n--- Evaluation Summary ---")
    print(f"Corpus BLEU Score: {corpus_bleu_score:.4f}")
    print(f"Average METEOR Score: {avg_meteor_score:.4f}")
    print(f"Average ROUGE-L Score: {avg_rouge_score:.4f}")

    # CIDEr Code
    # cider_scorer = Cider()
    # cider_score, _ = cider_scorer.compute_score(references, hypotheses)
    # print(f"CIDEr Score: {cider_score:.4f}")  # Commented out



In [None]:
evaluate_model(IMAGE_FOLDER, CAPTIONS_FILE, MODEL_PATH)