In [None]:
import os
import base64
import requests
import pandas as pd
from dotenv import load_dotenv
from tqdm.notebook import tqdm
import time

In [None]:
load_dotenv()
OPENAI_API = os.getenv("EMNLP_OPENAI_API") 

In [None]:
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')        

In [None]:
def prompt_with_cap(captions, question):
    return """
    You are an expert Bengali Question answering assistant. Given a caption, when asked a question with the context of the caption
    you try to provide a single worded answer by following the guidelines given below:
    \n 
    1:  Try to generate answer of one or two words. And the answer must never contain more than three words.
    2:  Always answer the question in Bengali language.
    \n
    CAPTION# {captions},
    QUESTION# {question}
    When generating the bengali answer of the question mentioned, generate in the following format:
        
    ANSWER# "(generated answer)"

    """.format(captions =captions,question=question)

In [None]:
def prompt_nocap(question):
    return """
    You are an expert Bengali Visual Question answering assistant. Given an image, when asked a question with the context of image
    you try to provide a single worded answer by following the guidelines given below:
    \n 
    1:  The answer should always be image aligned and informative.
    2:  Try to generate answer of one or two words. And the answer must never contain more than three words.
    3:  Always answer the question in Bengali language.
    \n
    QUESTION# {question}
    When generating the bengali answer of the question mentioned, generate in the following format:
        
    ANSWER# "(generated answer)"

    """.format(question=question)

In [None]:
def generate_Answer(captions, question):

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {OPENAI_API}"
    }
    payload = {
        # "model": "gpt-4o", "gpt-4-turbo"
        "model": "gpt-3.5-turbo",
        "messages": [
            {
                "role": "user",
                "content": [
                    
                    {"type": "text", "text": prompt(captions, question)},
                    # {"type": "image_url", "image_url": {"url": 
                    #     f"data:image/jpeg;base64,{image}"}}
                ]
            }
        ],
        "max_tokens": 30
    }  
    
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    return response.json() #['choices'][0]['message']['content']


In [None]:
filename = "dataset/LLM_generated/valid_gpt_35_cap.csv"

In [None]:

test_df = pd.read_csv("dataset/final_csvs/updated_valid.csv")

# test_df.drop(columns='GPT4o_cap',inplace=True)
test_df.head(2)

In [None]:
cap_df = pd.DataFrame(test_df[['image_name', 'Question', 'Captions', 'Answer_fixed']])

cap_df.head(2)
# cap_df.isna().sum()

In [None]:
cap_df.loc[:,'valid_GPT35_cap'] = None
image_path = "dataset/archive/Bangla_VQA/images"
# nocap_df = nocap_df.sample(frac=1)

total_input_tokens = 0
total_output_tokens = 0
total_tokens = 0

In [None]:
slicedf = cap_df.iloc[0:]
slicedf.head(3)

In [None]:
for idx, row in tqdm(slicedf.iterrows(), total=slicedf.shape[0]):
    
    # base64_image = encode_image(f"{image_path}/{row['image_name']}")
    captions = row['Captions']
    question = row['Question']
    
    res = generate_Answer(captions,  question)
    # print(response['choices'][0]['message']['content'])    
    total_input_tokens = total_input_tokens + res['usage']['prompt_tokens']
    total_output_tokens = total_output_tokens + res['usage']['completion_tokens']

    cap_df.loc[cap_df['image_name'] == str(row['image_name']), 'valid_GPT35_cap'] = str(res['choices'][0]['message']['content'])
    
    print("done: idx: ",idx," filename: ", str(row['image_name']), "input_token: ", res['usage']['prompt_tokens'], "output_token: ", res['usage']['completion_tokens'])
    
    time.sleep(1)
    # break

In [None]:
cap_df.to_csv(f"{filename}", index=False)

In [None]:
cap_df.head()

In [None]:
print("total input tokens: ", total_input_tokens)
print("total output tokens: ", total_output_tokens)
print(f"total cost: {total_input_tokens * 0.000005 + total_output_tokens * 0.000015}")