In [40]:
import os 
import json
from dotenv import load_dotenv
from pydantic import BaseModel
from collections import defaultdict
from openai import OpenAI

In [41]:
load_dotenv()
api_key = os.getenv('API_KEY')

client = OpenAI(api_key=api_key)

In [42]:
class captionGenerateResponse(BaseModel):
    caption: str

In [43]:
system_prompt = """
    ### Role
    You are a chatbot that generates descriptions of food items based on user input.
    The captions you create will later be used to classify images of food.
    ### Importance
    Focus on describing the food's color, ingredients, and appearance. 
    Your task is to generate detailed captions centered around these characteristics for the given food input.
    Do not generate ways to enhance the taste of the given food. Focus on describing its appearance.
    Do not use adjectives or adverbs like "perfectly", "glistening", "fresh", "glossy", "colorful", "variant" and "artfully" in the translation.
    Most critical point, the response must be fewer than 50 tokens.
    ### Few-shots
    1. class name : Spaghetti with clms
    response : Spaghetti tossed with plump, juicy clams, glistening in garlic-infused olive oil. Parsley and a sprinkle of red pepper are added.
    2. class name : fried chicken
    response : Golden-brown fried chicken, crispy, with a crunchy coating. Juicy meat peeks through, garnished with parsley.
    3. class name : kimchi 
    response : Vibrant red and orange kimchi, made from crisp napa cabbage and radish, is laced with red mist and garlic. It showcases a green scallions peeking through.
    4. class name : ricotta cheese
    response : Creamy white ricotta cheese, displays soft peaks with a hint of grain.
    5. class name : galic bread
    response : Golden-brown garlic bread, crisp on the outside, is slathered in a mixture of melted butter and minced garlic. Parsley adds a pop of green.
    6. class name : pumpkin soup
    response : Vibrant orange pumpkin soup, creamy and smooth, garnished with a sprinkle of nutmeg and a swirl of rich cream.
    7. class name : salmon sashimi
    response : Lustrous, vibrant orange salmon sashimi, sliced to showcase the rich marbling. Each piece glistens is arranged on a white plate.
    8. class name : oi sobagi
    response : Oi sobagi, a Korean cucumber dish, features green cucumbers filled with a spicy, mixture of chili pepper, garlic, and scallions. The dish showcases red bits of pepper and flecks of green herbs.
    9. class name : Yu Xiang Eggplant
    response : Yu Xiang Eggplant, featuring deep purple, eggplant slices sautéed in a sauce with garlic, ginger, and chili peppers. Green onions and red chili flakes add contrast.
    10. class name : Nagasaki Champon
    response : Nagasaki Champon features yellow ramen noodles, vegetables, and seafood such as shrimp and squid. The broth is cloudy white, garnished with slices of pork and topped with scallions.
"""

In [44]:
def caption_generate(client, prompt, className):
    
    response = client.beta.chat.completions.parse(
        model='gpt-4o-mini',
        messages=[
            {"role": "system", "content": prompt},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": f"Please generate caption about the {className}"}
                ]
            }
        ],
        response_format=captionGenerateResponse
    )

    result = response.choices[0].message.content
    response_data = json.loads(result)

    return response_data['caption']

In [30]:
result = caption_generate(client, system_prompt, "Nagasaki Champon")
result

'Nagasaki Champon features a bowl of thick, yellow ramen noodles, vibrant vegetables, and seafood such as shrimp and squid. The broth is a rich, cloudy white, garnished with slices of pork and topped with scallions.'

In [45]:
data_dir = "/Users/anjonghyeon/Desktop/KU/3-2/DeepLearning/20242R0136COSE47402/FinalProject/data/test"
class_candidate = []

for folder in os.listdir(data_dir):
    folder = folder.replace('_', ' ')
    class_candidate.append(folder)

In [47]:
class_captions = defaultdict(list)
for food in class_candidate:
    result = caption_generate(client, system_prompt, food)
    class_captions[food] = result

In [48]:
len(class_captions)

defaultdict(list,
            {'foie gras': 'Foie gras presents a rich, creamy texture, with a pale beige hue. It is arranged elegantly, showcasing its smooth surface. Accompanying garnishes may include fruit compote and toasted bread.',
             'club sandwich': 'A club sandwich stacked high, featuring layers of golden-brown toasted bread, crispy bacon, turkey slices, and fresh lettuce. Slices of tomato add color, and a toothpick holds it together.',
             'cheese plate': 'A cheese plate displaying an assortment of cheeses, including creamy brie, sharp cheddar, and blue cheese. It features slices, wedges, and cubes of cheese accompanied by crackers, fruits, and nuts.',
             'cup cakes': 'Cupcakes with soft, fluffy bases, topped with creamy frosting in various colors. Sprinkles and decorative toppings add texture, all presented in colorful liners.',
             'garlic bread': 'Garlic bread with a golden-brown crust, crisp on the outside. It is coated with a rich mi

In [None]:
output_path = '/Users/anjonghyeon/Desktop/KU/3-2/DeepLearning/20242R0136COSE47402/FinalProject/src/class_captions.json'
with open (output_path, 'w') as file:
    json.dump(class_captions, file, indent=4)