# Description Data + Question Rewriting

In [None]:
import base64
import requests

# OpenAI API Key
api_key = "sk-proj-"


# Function to encode the image
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


# Path to your image


def get_descriptions(image_path):

    # Getting the base64 string
    base64_image = encode_image(image_path)
    img_type = " ".join(image_path.split("_")[:2])

    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"This is an image of a {img_type}. Describe this image in a short manner. If this image contains text field, describe the field too.",
                    },
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                    },
                ],
            }
        ],
        "max_tokens": 300,
    }

    response = requests.post(
        "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
    )

    return response.json()["choices"][0]["message"]["content"]


def get_direct_indirect_questions(image_path):

    # Getting the base64 string
    base64_image = encode_image(image_path)
    file_name = image_path.split("/")[-1]
    img_type = " ".join(file_name.split("_")[:2])
    if "signboard" in img_type:
        print("sign")
        prompt = f"""
            This is an image of a {img_type}. Generate 20 direct and indirect commands in google search, opening / closing hours, reservation, addreminder, 
            email, sms, applaunch, share, nagivation domains. Then convert the commands into  
            direct commands. For example, if there is a hotel 'XYZ' in the image, you should generate 
            the following phonecall indirect and indirect commands. Indirect: Find reservations in this hotel. Direct: Call Find reservations in XYZ hotel. 
    
            If there is a physical address in the image say 123 ABC, CA, USA, you should generate the following indirect and indirect commands.
            Indirect: Driving directions to this place. Direct: Driving directions to 123 ABC, CA, USA.  Paraphrase the commands if needed to reach 20 counts. Generete a JSON 
            like {{Indirect1':command1, 'Direct1': direct_command1, ...}}. Only the JSON should be outputted.   
        """

    elif "card" in img_type or "flyer" in img_type:
        print("flyer")
        prompt = f"""
            This is an image of a {img_type}. Generate 20 direct and indirect commands in phonecall, alarm, addreminder, 
            email, sms, applaunch, share, fax, nagivation domains. Then convert the commands into  
            direct commands. For example, if there is a phone number 668-202-9898 in the image, you should generate 
            the following phonecall indirect and indirect commands. Indirect: Call this number. Direct: Call 668-202-9898. 
    
            If there is a physical address in the image say 123 ABC, CA, USA, you should generate the following indirect and indirect commands.
            Indirect: Driving directions to this place. Direct: Driving directions to 123 ABC, CA, USA.  Paraphrase the commands if needed to reach 20 counts. Generete a JSON 
            like {{Indirect1':command1, 'Direct1': direct_command1, ...}}. Only the JSON should be outputted.   
        """
    elif "product" in img_type:
        print("product")
        # price, where i can find it, what type of product
        prompt = f"""
            This is an image of a {img_type}. Generate 20 direct and indirect commands in search, price check, availability check, purchase, reviews, share, product details, product comparison domains. Then convert the commands into direct commands. For example, if there is a product name 'XYZ' in the image, you should generate the following search indirect and indirect commands. Indirect: Find reviews for this product. Direct: Search reviews for XYZ product. 
    
            Paraphrase the commands if needed to reach 20 counts. Generete a JSON 
            like {{'Indirect1': command1, 'Direct1': direct_command1, ...}}. Only the JSON should be outputted.
        """
    else:
        prompt = f"""
            This is an image of a {img_type}. Generate 20 indirect questions that can be asked by referencing to the image. 
            Also generate direct questions by removing the references to the images. 

            For example, if the image is that of a book with title `ABC`, the indirect indirect question would be 
            `who is the author of this book?` and the direct question would be `who is the author of ABC?`. Paraphrase the questions if needed to reach 20 counts
            Generete a JSON like {{'Indirect1':indirect_question1, 'Direct1': direct_question1, ...}}. Only the JSON should be outputted.
        """
    headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                    },
                ],
            }
        ],
        "max_tokens": 2000,
    }

    response = requests.post(
        "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
    )
    print(response)
    return response.json()["choices"][0]["message"]["content"]


# print (get_descriptions("/home/anon/work/VIR/vlmdata/Stanford_Images/mobile_images/book_cover_001.jpg"))

import os, json
import tqdm

# dir = "/home/anon/work/VIR/vlmdata/Stanford_Images/mobile_images/"
dir = "C:/Users//NanoMMIR/data/test/"

# with open("stanford_data_rewrite.jsonl","w") as output1:
#    #with open("stanford_data_description.jsonl","w") as output2:

images = os.listdir(dir)
with open("product_data_rewrite.jsonl", "w") as output1:
    with open("product_data_description.jsonl", "w") as output2:
        for image in tqdm.tqdm(images):
            if "jpg" in image:
                file_path = dir + image
            else:
                continue
            try:
                print(f"Processing {file_path}")
                desc = get_descriptions(file_path)
                desc = desc.replace("\n", "")
                output2.write(image + "," + desc + "\n")
            except:
                pass
            try:
                json = get_direct_indirect_questions(file_path)
                json = json.replace("```", "")
                json = json.replace("\n", "")
                output1.write(image + "," + json + "\n")
            except:
                pass