In [1]:
from openai import OpenAI
import os
os.environ["OPENAI_API_KEY"] = "" # Set your OpenAI API key here

In [2]:
import json
import base64
from PIL import Image
import io
question ="""The person is holding something in his hand. What will the person do next? Answer with a pair of actions and an object."""

# Read the JSON file
with open("test/few_shot_samples.json", "r") as f:
    few_shot_samples = json.load(f)

qa_examples = []
i = 0
for entity_set in few_shot_samples:
    answer = entity_set["answer"]
    qa_examples.append({
        "input_path": f"./test/{i}.png",
        "output": answer
    })
    i += 1

few_shot_messages = []
for qa_example in qa_examples:
    encoded_string = None
    image = Image.open(qa_example['input_path'])
    with open(qa_example['input_path'], 'rb') as f:
        image = Image.open(f)
        jpeg_buffer = io.BytesIO()
        image.save(jpeg_buffer, format="JPEG", quality=90)
        encoded_string = base64.b64encode(jpeg_buffer.getvalue()).decode('utf-8')
    
    few_shot_messages.append({
        "role": "user",
        "content": [
            {"type": "text", "text":question},
            {"type": "image_url","image_url": {"url": f"data:image/jpeg;base64," + encoded_string, "detail": "low"}},
        ],    
    })
    few_shot_messages.append({
        "role": "assistant",
        "content":[{"type": "text", "text": qa_example['output']}]
    })

In [3]:
with open("test/ground_truth.json", "r") as f:
    ground_truth = json.load(f)

results = []

i = len(few_shot_samples)
for entity_set in ground_truth:
    image_path = "test/" + str(i) + ".png"
    encoded_string = None
    image = Image.open(image_path)
    with open(qa_example['input_path'], 'rb') as f:
        image = Image.open(f)
        jpeg_buffer = io.BytesIO()
        image.save(jpeg_buffer, format="JPEG", quality=90)
        encoded_string = base64.b64encode(jpeg_buffer.getvalue()).decode('utf-8')
        
    client = OpenAI()
    response = client.chat.completions.create(
        model="gpt-4o",
        messages= few_shot_messages + [
            {
                "role": "user",
                "content":[
                    {"type": "text", "text": question},
                    {"type": "image_url","image_url": {
                        "url": f"data:image/jpeg;base64,"+encoded_string, 
                        "detail":"low"
                    }},
                ],
            },
        ],
        max_tokens=50,
    )
    
    results.append(response.choices[0].message.content)
    i += 1

In [4]:
answers = [x["answer"] for x in ground_truth]

In [None]:
import nltk
# nltk.download('punkt')
from nltk.translate import bleu
from nltk import word_tokenize
from rouge_score.rouge_scorer import RougeScorer
from nltk.translate import meteor

sum_bleu = 0
sum_rouge1 = 0
sum_rouge2 = 0
sum_rougeL = 0
sum_meteor = 0
for result, answer in zip(results, answers):
    # Calculate BLEU score
    result_token = word_tokenize(result)
    answer_token = word_tokenize(answer)
    print()
    bleu_score = bleu(
        [result.split()], 
        answer.split(),
        (1,),
        )

    # Calculate ROUGE score
    scorer = RougeScorer(["rouge1", "rouge2", "rougeL", "rougeLsum"])
    rouge_scores = scorer.score(result, answer)

    # Calculate METEOR score
    meteor_score = round(meteor(
        [result_token],
        answer_token,
        ), 4)

    # Print the scores
    sum_bleu += bleu_score
    sum_rouge1 += rouge_scores["rouge1"].fmeasure
    sum_rouge2 += rouge_scores["rouge2"].fmeasure
    sum_rougeL += rouge_scores["rougeL"].fmeasure
    sum_meteor += meteor_score

print(f"BLEU: {sum_bleu/len(results)}")
print(f"ROUGE-1: {sum_rouge1/len(results)}")
print(f"ROUGE-2: {sum_rouge2/len(results)}")
print(f"ROUGE-L: {sum_rougeL/len(results)}")
print(f"METEOR: {sum_meteor/len(results)}")
