In [1]:
import os
os.environ["OPENAI_API_KEY"] = ""

In [2]:
from openai import OpenAI

client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
      "role": "user",
      "content": [
        {"type": "text", "text": "What’s in this image?"},
        {
          "type": "image_url",
          "image_url": {
            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
          },
        },
      ],
    }
  ],
  max_tokens=300,
)

print(response.choices[0])

Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The image depicts a serene landscape with a wooden pathway leading through a lush green area. The pathway appears to be surrounded by tall grass and some shrubs or small trees, with a clear blue sky overhead adorned with soft clouds. This scene suggests a natural, peaceful outdoor environment, likely in a meadow or wetland area.', refusal=None, role='assistant', function_call=None, tool_calls=None))


In [6]:
import base64
from openai import OpenAI

client = OpenAI()

# Function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# Path to your image
image_path = "images/mcdonalds.jpeg"

# Getting the base64 string
base64_image = encode_image(image_path)

response = client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": "How much calorie is this food?",
        },
        {
          "type": "image_url",
          "image_url": {
            "url":  f"data:image/jpeg;base64,{base64_image}"
          },
        },
      ],
    }
  ],
)

print(response.choices[0])



Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content="The meal you've shown includes a Big Mac, medium fries, and a medium soft drink. Here's a rough estimate of the calories for each item:\n\n- **Big Mac**: Approximately 550 calories\n- **Medium Fries**: Approximately 340 calories\n- **Medium Soft Drink** (like a Coke): Approximately 210 calories\n\n### Total Estimated Calories: \n**About 1,100 calories**\n\nKeep in mind that actual calorie counts can vary based on portion sizes and specific variations in preparation.", refusal=None, role='assistant', function_call=None, tool_calls=None))


In [7]:
print(response.choices[0].message.content)

The meal you've shown includes a Big Mac, medium fries, and a medium soft drink. Here's a rough estimate of the calories for each item:

- **Big Mac**: Approximately 550 calories
- **Medium Fries**: Approximately 340 calories
- **Medium Soft Drink** (like a Coke): Approximately 210 calories

### Total Estimated Calories: 
**About 1,100 calories**

Keep in mind that actual calorie counts can vary based on portion sizes and specific variations in preparation.


TODO: (openai version)
1. fine tune  https://platform.openai.com/docs/guides/fine-tuning
2. function calling https://platform.openai.com/docs/guides/function-calling
3. evals https://platform.openai.com/docs/guides/evals


In [9]:
#fine-tuning
import json
import openai
import os
import pandas as pd
from pprint import pprint

In [12]:
foods_df = pd.read_csv("data/FoodandCalories.csv")

foods_df.head()


Unnamed: 0,Food,Serving,Calories
0,Artichoke,1 artichoke (128 g),60 cal
1,Arugula,1 leaf (2 g),1 cal
2,Asparagus,1 spear (12 g),2 cal
3,Aubergine,1 aubergine (458 g),115 cal
4,Beetroot,1 beet (82 g),35 cal


In [14]:
system_message = "You are a helpful calorie assistant. You are to extract the generic foods and their calories."


def create_user_message(row):
    return f"Food: {row['Food']}\n\serving: {row['Serving']}\n\nCalories: "


def prepare_example_conversation(row):
    return {
        "messages": [
            {"role": "system", "content": system_message},
            {"role": "user", "content": create_user_message(row)},
            {"role": "assistant", "content": row["Calories"]},
        ]
    }


pprint(prepare_example_conversation(foods_df.iloc[0]))

{'messages': [{'content': 'You are a helpful calorie assistant. You are to '
                          'extract the generic foods and their calories.',
               'role': 'system'},
              {'content': 'Food: Artichoke\n'
                          '\\serving: 1 artichoke (128 g)\n'
                          '\n'
                          'Calories: ',
               'role': 'user'},
              {'content': '60 cal', 'role': 'assistant'}]}


In [15]:
training_df = foods_df.loc[0:100]

# apply the prepare_example_conversation function to each row of the training_df
training_data = training_df.apply(prepare_example_conversation, axis=1).tolist()

for example in training_data[:5]:
    print(example)

{'messages': [{'role': 'system', 'content': 'You are a helpful calorie assistant. You are to extract the generic foods and their calories.'}, {'role': 'user', 'content': 'Food: Artichoke\n\\serving: 1 artichoke (128 g)\n\nCalories: '}, {'role': 'assistant', 'content': '60 cal'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful calorie assistant. You are to extract the generic foods and their calories.'}, {'role': 'user', 'content': 'Food: Arugula\n\\serving: 1 leaf (2 g)\n\nCalories: '}, {'role': 'assistant', 'content': '1 cal'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful calorie assistant. You are to extract the generic foods and their calories.'}, {'role': 'user', 'content': 'Food: Asparagus\n\\serving: 1 spear (12 g)\n\nCalories: '}, {'role': 'assistant', 'content': '2 cal'}]}
{'messages': [{'role': 'system', 'content': 'You are a helpful calorie assistant. You are to extract the generic foods and their calories.'}, {'role': 'user', 'content': 'Fo

In [16]:
validation_df = foods_df.loc[101:200]
validation_data = validation_df.apply(
    prepare_example_conversation, axis=1).tolist()

In [17]:
def write_jsonl(data_list: list, filename: str) -> None:
    with open(filename, "w") as out:
        for ddict in data_list:
            jout = json.dumps(ddict) + "\n"
            out.write(jout)

In [18]:
training_file_name = "foods_finetune_training.jsonl"
write_jsonl(training_data, training_file_name)

validation_file_name = "foods_finetune_validation.jsonl"
write_jsonl(validation_data, validation_file_name)

In [19]:
!head -n 5 tmp_recipe_finetune_training.jsonl

head: tmp_recipe_finetune_training.jsonl: No such file or directory


In [20]:
def upload_file(file_name: str, purpose: str) -> str:
    with open(file_name, "rb") as file_fd:
        response = client.files.create(file=file_fd, purpose=purpose)
    return response.id


training_file_id = upload_file(training_file_name, "fine-tune")
validation_file_id = upload_file(validation_file_name, "fine-tune")

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Training file ID: file-IRQQqS11B7G8xSWn6QBaq8iY
Validation file ID: file-DMNM1KnhvDfXbT9UiTQle7Da


In [21]:
MODEL = "gpt-4o-mini-2024-07-18"

response = client.fine_tuning.jobs.create(
    training_file=training_file_id,
    validation_file=validation_file_id,
    model=MODEL,
    suffix="foods-project",
)

job_id = response.id

print("Job ID:", response.id)
print("Status:", response.status)

Job ID: ftjob-gpKNdntrTXfzQWLKGQ5lxnSq
Status: validating_files


In [22]:
response = client.fine_tuning.jobs.retrieve(job_id)

print("Job ID:", response.id)
print("Status:", response.status)
print("Trained Tokens:", response.trained_tokens)


Job ID: ftjob-gpKNdntrTXfzQWLKGQ5lxnSq
Status: validating_files
Trained Tokens: None


In [26]:
response = client.fine_tuning.jobs.list_events(job_id)

events = response.data
events.reverse()

for event in events:
    print(event.message)

APIConnectionError: Connection error.

In [25]:
response = client.fine_tuning.jobs.retrieve(job_id)
fine_tuned_model_id = response.fine_tuned_model

if fine_tuned_model_id is None:
    raise RuntimeError(
        "Fine-tuned model ID not found. Your job has likely not been completed yet."
    )

print("Fine-tuned model ID:", fine_tuned_model_id)

APIConnectionError: Connection error.