In [None]:
!pip install torch torchvision transformers aiofiles



In [None]:
import os
import json
import torch
import asyncio
import aiofiles
import nest_asyncio
from tqdm import tqdm
from PIL import Image
from google.colab import drive
from transformers import AutoProcessor, AutoModelForVision2Seq

nest_asyncio.apply()

drive.mount('/content/drive', force_remount=True)

dataset_path = '/content/drive/MyDrive/mmml_project/mini_gqa.json'
output_folder = '/content/drive/MyDrive/mmml_project/outputs'
os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'mini_gqa_with_smolvlm.json')

with open(dataset_path, 'r') as f:
    data = json.load(f)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
model = AutoModelForVision2Seq.from_pretrained("HuggingFaceTB/SmolVLM-Instruct").to(device)

SEMAPHORE = asyncio.Semaphore(4)

async def process_record(record):
    question = record["question"]
    image_file = record["image_file"]

    if not os.path.exists(image_file):
        record["smolvlm_response"] = "Error: Image not found"
        return record

    image = Image.open(image_file).convert("RGB")
    formatted_question = f"<image> {question}"

    async with SEMAPHORE:
        try:
            inputs = processor(images=[image], text=[formatted_question], return_tensors="pt").to(device)
            with torch.no_grad():
                generated_ids = model.generate(**inputs)
            generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
            record["smolvlm_response"] = generated_text.replace("<image>", "").strip()
        except Exception as e:
            record["smolvlm_response"] = f"Error: {str(e)}"

    return record

async def process_dataset():
    tasks = [process_record(record) for record in data]
    updated_data = await asyncio.gather(*tasks)

    async with aiofiles.open(output_path, 'w') as f:
        await f.write(json.dumps(updated_data, indent=2))

    print(f"Updated dataset saved to: {output_path}")

await process_dataset()

Mounted at /content/drive


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Some kwargs in processor config are unused and will not have any effect: image_seq_len. 


Updated dataset saved to: /content/drive/MyDrive/mmml_project/outputs/mini_gqa_with_smolvlm.json
