In [1]:
from transformers import AutoProcessor, BitsAndBytesConfig, LlavaNextForConditionalGeneration
import torch
from datasets import load_dataset
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
MODEL_ID = "llava-hf/llava-v1.6-mistral-7b-hf"
processor = AutoProcessor.from_pretrained(MODEL_ID)

# Define quantization config
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16
)

# Load the base model with adapters on top
model = LlavaNextForConditionalGeneration.from_pretrained(
    "/home/vaibhav/LLMs-TimeSeries/software/Checkpoint_img_model/",
    # MODEL_ID,
    torch_dtype=torch.float16,
    quantization_config=quantization_config,
)

Some kwargs in processor config are unused and will not have any effect: num_additional_image_tokens. 
`low_cpu_mem_usage` was None, now default to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.02s/it]


In [15]:
dataset = load_dataset("VaibhavMal/AirQualty_imageConv", split="test")
print(dataset)

Dataset({
    features: ['id', 'image_path', 'query', 'answers', 'image'],
    num_rows: 31
})


In [16]:
image = dataset[0]["image"]
en_query = dataset[0]["query"]
print(en_query)

prompt = f"[INST] <image>\n{en_query} [\INST]"
max_output_token = 4096
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
output = model.generate(**inputs, max_new_tokens=max_output_token)
response = processor.decode(output[0], skip_special_tokens=True)
response


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Analyze the provided Scalogram of Nitrogen Oxide day time data. Estimate the expected average C0 value for the subsequent day.


'[INST]  \nAnalyze the provided Scalogram of Nitrogen Oxide day time data. Estimate the expected average C0 value for the subsequent day. [\\INST] 122222222222222222222222222222222222222222222222222222222222222\n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n '

In [17]:
image = dataset[0]["image"]
en_query = "Analyze the image and predict what kind of time series transformation it could have been?"
print(en_query)

prompt = f"[INST] <image>\n{en_query} [\INST]"
max_output_token = 4096
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
output = model.generate(**inputs, max_new_tokens=max_output_token)
response = processor.decode(output[0], skip_special_tokens=True)
response

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Analyze the image and predict what kind of time series transformation it could have been?


'[INST]  \nAnalyze the image and predict what kind of time series transformation it could have been? [\\INST] 11111111111111111111111111111111111111111111111111111 \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n\n- \n '