In [None]:
!pip install -q fastapi uvicorn pyngrok transformers torch torchvision

In [None]:
from pyngrok import ngrok, conf

# 🔐 Add your token here
conf.get_default().auth_token = "YOUR_NGROK_API_KEY"

# Create public tunnel
public_url = ngrok.connect(8000)
print("🔗 Public URL:", public_url)

🔗 Public URL: NgrokTunnel: "https://e839-34-125-252-110.ngrok-free.app" -> "http://localhost:8000"


In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
import io
from PIL import Image
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import JSONResponse
from transformers import BlipProcessor, BlipForConditionalGeneration
from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering
import torch
import uvicorn
import nest_asyncio
import threading
import time

# # Load model
# processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
# model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

# processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-6.7b")
# model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip2-opt-6.7b")

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

app = FastAPI()

@app.post("/caption")
async def caption_image(file: UploadFile = File(...)):
    try:
        image = Image.open(io.BytesIO(await file.read())).convert('RGB')
        inputs = processor(image, return_tensors="pt").to(device)
        output = model.generate(**inputs)
        caption = processor.decode(output[0], skip_special_tokens=True)
        # inputs = processor(images=image, text="Describe this image", return_tensors="pt").to(device)
        # generated_ids = model.generate(**inputs,max_length=20,num_beams=5,early_stopping=True)
        # caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return JSONResponse(content={"caption": caption})
    except Exception as e:
        return JSONResponse(status_code=500, content={"error": str(e)})


# Required to run FastAPI inside Colab
nest_asyncio.apply()

def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Run server in background
threading.Thread(target=run_app).start()

In [None]:
# for QA RAG Application

from fastapi import FastAPI, UploadFile, File, Form
from pyngrok import ngrok
from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
from sentence_transformers import SentenceTransformer
from PIL import Image
import torch

app = FastAPI()

# Load models
device = "cuda" if torch.cuda.is_available() else "cpu"

caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to(device)
caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")

embedder = SentenceTransformer("all-MiniLM-L6-v2")
qa_pipeline = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")

@app.post("/agent")
async def process_image(file: UploadFile = File(...), question: str = Form(...)):
    image = Image.open(file.file).convert("RGB")

    # Captioning
    inputs = caption_processor(images=image, return_tensors="pt").to(device)
    caption_ids = caption_model.generate(**inputs)
    caption = caption_processor.decode(caption_ids[0], skip_special_tokens=True)

    # QA based on caption
    answer = qa_pipeline(question=question, context=caption)["answer"]

    return {"caption": caption, "answer": answer}

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.60k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.88G [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/527 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/473 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/261M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Device set to use cuda:0
