In [None]:
!git clone https://github.com/microsoft/llava-rad.git
%cd llava-rad

In [None]:
!pip install --upgrade pip
!pip install -e .

In [None]:
import requests
import torch
from PIL import Image
from io import BytesIO
# import pandas as pd
import os
%cd llava-rad
from llava.constants import IMAGE_TOKEN_INDEX
from llava.conversation import conv_templates
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import tokenizer_image_token, KeywordsStoppingCriteria

In [None]:
def load_image(image_file):
    if image_file.startswith('http') or image_file.startswith('https'):
        response = requests.get(image_file)
        image = Image.open(BytesIO(response.content)).convert('RGB')
    else:
        image = Image.open(image_file).convert('RGB')
    return image

In [None]:
disable_torch_init()

model_path = "microsoft/llava-rad"
model_base = "lmsys/vicuna-7b-v1.5"
model_name = "llavarad"
conv_mode = "v1"

tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name)

In [None]:
import pandas as pd
vqa_rad_dir = "/kaggle/input/vqa-rad/VQA-RAD-Chest/VQA_RAD_Chest_Image_Folder"
df = pd.read_csv('/kaggle/input/vqa-rad/VQA-RAD-Chest/VQA_RAD_Chest_Data.csv')
df.head()

In [None]:
df=df[df.A_TYPE=="OPEN"]
df

In [None]:
responses = []

for i, row in df.iterrows():
  image_id = row['IMAGEID'].split('/')[-1]
  image_file = os.path.join(vqa_rad_dir, image_id)
  query = f"<image>{row['QUESTION']}"
  conv = conv_templates[conv_mode].copy()
  conv.append_message(conv.roles[0], query)
  conv.append_message(conv.roles[1], None)
  prompt = conv.get_prompt()

  print(prompt)
  image = load_image(image_file)
  image_tensor = image_processor.preprocess(image, return_tensors='pt')['pixel_values'][0].half().unsqueeze(0).cuda()

  input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt').unsqueeze(0).cuda()

  stopping_criteria = KeywordsStoppingCriteria(["</s>"], tokenizer, input_ids)

  with torch.inference_mode():
      output_ids = model.generate(
          input_ids,
          images=image_tensor,
          do_sample=False,
          temperature=0.0,
          max_new_tokens=1024,
          use_cache=True)

  outputs = tokenizer.batch_decode(output_ids[:, input_ids.shape[1]:], skip_special_tokens=True)[0]
  outputs = outputs.strip()
  responses.append(outputs)
  print(i+1)

In [None]:
df['llava-rad response'] = responses
df.to_csv('/kaggle/working/Vanilla_VQA_RAD_LLaVA-RAD.csv', index=False)
df

In [None]:
responses