In [None]:
from InferX import list_models

available_models = list_models()
for model in available_models:
    print(f"Implementation: {model['implementation']}, Model Type: {model['model_type']}")

## Load the BLIP2 Model and Run Inference

In [None]:
from InferX import get_model

# Instantiate a Transformers model
model = get_model("Salesforce/blip2-opt-2.7b", implementation="transformers")

# Input data
image = "https://img.freepik.com/free-photo/adorable-black-white-kitty-with-monochrome-wall-her_23-2148955182.jpg"
prompt = "What's in this image? Answer:"

# Run inference
processed_input = model.preprocess(image, prompt)

prediction = model.predict(processed_input)
output = model.postprocess(prediction)

print(output)


In [None]:
# Input data
image = "https://img.freepik.com/free-photo/adorable-black-white-kitty-with-monochrome-wall-her_23-2148955182.jpg"
prompt = "Describe this image in concise detail. Answer:"

# Run inference
processed_input = model.preprocess(image, prompt)

prediction = model.predict(processed_input, max_new_tokens=200)
output = model.postprocess(prediction)

print(output)

## Load the VLRM-finetuned BLIP2 model

In [None]:
from InferX import get_model

# Instantiate a Transformers model
model = get_model("sashakunitsyn/vlrm-blip2-opt-2.7b", implementation="transformers")

# Input data
image = "https://img.freepik.com/free-photo/adorable-black-white-kitty-with-monochrome-wall-her_23-2148955182.jpg"
prompt = "What's in this image? Answer:"

# Run inference
processed_input = model.preprocess(image, prompt)

prediction = model.predict(processed_input, max_new_tokens=200)
output = model.postprocess(prediction)

print(output)


In [None]:
import torch
import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16).to("cuda")

img_url = "https://img.freepik.com/free-photo/adorable-black-white-kitty-with-monochrome-wall-her_23-2148955182.jpg"
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

inputs = processor(raw_image, return_tensors="pt").to("cuda", torch.float16)

out = model.generate(**inputs, max_new_tokens=20)
processor.decode(out[0], skip_special_tokens=True).strip()


In [None]:
from huggingface_hub import hf_hub_download
finetuned_weights_state_dict = torch.load(hf_hub_download(repo_id="sashakunitsyn/vlrm-blip2-opt-2.7b", filename="vlrm-blip2-opt-2.7b.pt"))
model.load_state_dict(finetuned_weights_state_dict, strict=False)

out = model.generate(**inputs, max_new_tokens=20)
processor.decode(out[0], skip_special_tokens=True).strip()
