This notebook creates text descriptions based on altgosling output, specs, image, with an LLM.  

In [None]:
from openai import OpenAI
from PIL import Image
import base64
import os
import json
from dotenv import load_dotenv

load_dotenv()

You need an API key for this.

In [None]:
API_KEY = os.environ['API_KEY']

In [None]:
data_repo = "../data/unified"
lhs_path = "./cfg_rules_lhs.csv"

with open(lhs_path, "r") as f:
    lhs = f.read()

specs_dir = os.path.join(data_repo, "specs")
imgs_dir = os.path.join(data_repo, "imgs")
alt_dir = os.path.join(data_repo, "alt_0_2_4")

In [None]:
def get_files(name):
    with open(os.path.join(specs_dir, f"{name}.json"), 'r') as f:
        spec = f.read()
    
    with open(os.path.join(imgs_dir, f"{name}.png"), "rb") as f:
        img = f.read()
    img_base64 = base64.b64encode(img).decode("utf-8")

    with open(os.path.join(alt_dir, "altgosling", f"{name}.txt"), "r") as f:
        alt = f.read()

    with open(os.path.join(alt_dir, "processedspec", f"{name}.json"), "r") as f:
        processed_spec = f.read()

    return {"name": name, "spec": spec, "img": img_base64, "alt": alt, "processed_spec": processed_spec}

In [None]:
# load few shot examples
with open("few_shot_learning_examples.json", "r") as f:
    fewshot = json.load(f)

fewshots = []
for f in fewshot:
    obj = get_files(f)
    obj["description"] = fewshot[f]
    fewshots.append(obj)

In [None]:
prompt = f"""
I want to generate a text description of a visualization. 
The visualization is a json specification. 
I already have created an automatic alt text.
I also have classified the attributes of the specification.
I will use the text description in a multimodal search engine. 
The description should be as informative as possible.
"""
model = "gpt-4o"

In [None]:
## few shot result in too many tokens

# prompt = f"""
# I want to generate a text description of a visualization. 
# The visualization is a json specification. 
# I already have created an automatic alt text.
# I also have classified the attributes of the specification.
# I will use the text description in a multimodal search engine. 
# The description should be as informative as possible.

# Here are some examples of visualizations, their specs, images, alt texts, processed specs, and the description, which is the desired output.
# {fewshots}
# """
# model = "gpt-4o"

In [None]:
def send_prompt(prompt, model, files):
    client = OpenAI(
        api_key=API_KEY
    )

    response = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    {"type": "text", "text": f"spec: {files['spec']}"},
                    {"type": "text", "text": f"processed spec: {files['processed_spec']}"},
                    {"type": "text", "text": f"altgosling alt: {files['alt']}"},
                    {"type": "text", "text": f"attribute classification: {lhs}"},
                    {"type": "image_url",
                     "image_url": {
                         "url": f"data:image/png;base64,{files['img']}",
                     }}
                ],
            }
        ],
        model=model,
        max_tokens=300,
    )
    return response.choices[0].message.content


In [None]:
# specs = os.listdir(specs_dir)

# for example in specs[0:1]:
#     name_i = example.split(".json")[0]
#     files = get_files(name_i)

#     response = send_prompt(prompt, model, files)
#     print(response)

In [None]:
specs = os.listdir(specs_dir)

for example in specs[0:10]:
    name_i = example.split(".json")[0]
    files = get_files(name_i)

    response = send_prompt(prompt, model, files)
    with open(os.path.join(alt_dir, "altgosling-llm", f"{name_i}.txt"), "w") as f:
        f.write(response)

In [None]:
specs[0:10]