<a href="https://colab.research.google.com/github/crunchdomo/llm_conversation/blob/main/test_base_openai_baselne.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
# from google.colab import userdata
# userdata.get('OPENAI_API_KEY')

In [1]:
import openai
import json
from datetime import datetime
import re
import random
import torch
from transformers import AutoTokenizer, pipeline

# device = 0 if torch.cuda.is_available() else -1

class ModelHandler:
    def __init__(self, model_name):
        self.model_name = model_name
        self.device = 0 if torch.cuda.is_available() else -1

        if "llama" in model_name.lower():
            self._init_llama()
        else:
            self._init_openai()

    def _init_openai(self):
        self.client = openai.OpenAI(
            api_key="",  # Replace with your key
            base_url="https://api.deepinfra.com/v1/openai" if "grok" in self.model_name else None
        )

    def _init_llama(self):
        self.tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
        self.pipe = pipeline(
            task="text-generation",
            model=self.model_name,
            tokenizer=self.tokenizer,
            device=self.device,
            torch_dtype=torch.bfloat16
        )
        self.pipe.tokenizer.pad_token = self.pipe.tokenizer.eos_token

    def format_llama_prompt(self, messages):
        B_INST, E_INST = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>", "<|eot_id|>"
        return "".join(
            f"{B_INST}{msg['content']}{E_INST}" if msg['role'] == 'user'
            else f"<|start_header_id|>assistant<|end_header_id|>{msg['content']}<|eot_id|>"
            for msg in messages
        )

    def generate(self, messages, max_tokens=512):
        if hasattr(self, 'client'):
            response = self.client.chat.completions.create(
                model=self.model_name,
                messages=messages,
                max_tokens=max_tokens,
                temperature=0.7
            )
            return response.choices[0].message.content
        else:
            prompt = self.format_llama_prompt(messages)
            outputs = self.pipe(prompt, max_new_tokens=max_tokens)
            return outputs[0]["generated_text"][len(prompt):].strip()

def parse_steps(instructions):
    """Parse instructions into individual steps"""
    steps = re.split(r'\n\s*\d+\.\s+', instructions.strip())
    return [f"{i+1}. {s.strip()}" for i, s in enumerate(steps) if s.strip()]

def cook_recipe(recipe_data, automated_inputs, model_name="gpt-4-turbo", conversation_type="combined"):
    handler = ModelHandler(model_name)

    # Define conversation templates
    conversation_templates = {
        "combined": {
            1: [
                "Can I use sea salt instead of kosher salt?",
                "Why do we let the chicken sit at room temperature?"
            ],
            3: [
                "Can I substitute rosemary with thyme?",
                "What's the purpose of tossing the squash with herbs?"
            ]
        },
        "substitution_questions": {
            1: ["Can I use sea salt instead of kosher salt?"],
            3: ["Can I substitute rosemary with thyme?"]
        },
        "technique_clarification": {
            1: ["Why do we let the chicken sit at room temperature?"],
            3: ["What's the purpose of tossing the squash with herbs?"]
        }
    }

    # Initialize system prompt
    chat = [{
        "role": "system",
        "content": f"""You are a master chef guiding through: {recipe_data['title']}
        - Begin each step with "STEP: [NUMBER]"
        - Use metric measurements
        - End each step with "AWAITING CONFIRMATION"
        - After final step, say "CONVO-COMPLETE"
        - Answer questions about specific steps using their numbers"""
    }]

    # Process steps
    steps = parse_steps(recipe_data['instructions'])
    step_index = 0
    input_index = 0

    while step_index < len(steps) and input_index < len(automated_inputs):
        # Present current step
        chat.append({
            "role": "user",
            "content": f"Present step {step_index+1} clearly: {steps[step_index]}"
        })

        # Get model response
        chef_response = handler.generate(chat)
        print(f"\nChef: {chef_response}")
        chat.append({"role": "assistant", "content": chef_response})

        # Handle user input
        user_input = automated_inputs[input_index]
        print(f"\nTrainee: {user_input}")
        chat.append({"role": "user", "content": user_input})
        input_index += 1

        # Inject template questions
        current_step = step_index + 1
        if conversation_type in conversation_templates:
            template = conversation_templates[conversation_type]
            if current_step in template:
                for question in template[current_step]:
                    print(f"\n[Template Question] Trainee: {question}")
                    chat.append({"role": "user", "content": question})

                    # Get answer
                    answer = handler.generate(chat, max_tokens=256)
                    print(f"\nChef: {answer}")
                    chat.append({"role": "assistant", "content": answer})

        step_index += 1

    save_conversation(chat, f"cooking_session_{conversation_type}_{model_name.replace('/', '-')}.json")
    return chat

def save_conversation(chat_history, filename=None):
    if not filename:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"cooking_session_{timestamp}.json"

    with open(filename, 'w') as f:
        json.dump(chat_history, f, indent=2)
    print(f"Saved to {filename}")

# Recipe data structure
recipe = {
    "title": "Miso-Butter Roast Chicken With Acorn Squash Panzanella",
    "ingredients": [
        '1 (3½–4-lb.) whole chicken', '2¾ tsp. kosher salt, divided, plus more',
        '2 small acorn squash (about 3 lb. total)', '2 Tbsp. finely chopped sage',
        '1 Tbsp. finely chopped rosemary', '6 Tbsp. unsalted butter, melted, plus 3 Tbsp. room temperature',
        '¼ tsp. ground allspice', 'Pinch of crushed red pepper flakes', 'Freshly ground black pepper',
        '⅓ loaf good-quality sturdy white bread, torn into 1" pieces (about 2½ cups)',
        '2 medium apples (such as Gala or Pink Lady; about 14 oz. total), cored, cut into 1" pieces',
        '2 Tbsp. extra-virgin olive oil', '½ small red onion, thinly sliced',
        '3 Tbsp. apple cider vinegar', '1 Tbsp. white miso', '¼ cup all-purpose flour',
        '2 Tbsp. unsalted butter, room temperature', '¼ cup dry white wine',
        '2 cups unsalted chicken broth', '2 tsp. white miso', 'Kosher salt, freshly ground pepper'
    ],
    "instructions": """
        1. Pat chicken dry with paper towels, season all over with 2 tsp. salt, and tie legs together with kitchen twine. Let sit at room temperature 1 hour.

        2. Meanwhile, halve squash and scoop out seeds. Run a vegetable peeler along ridges of squash halves to remove skin. Cut each half into ½"-thick wedges; arrange on a rimmed baking sheet.

        3. Combine sage, rosemary, and 6 Tbsp. melted butter in a large bowl; pour half of mixture over squash on baking sheet. Sprinkle squash with allspice, red pepper flakes, and ½ tsp. salt and season with black pepper; toss to coat.

        4. Add bread, apples, oil, and ¼ tsp. salt to remaining herb butter in bowl; season with black pepper and toss to combine. Set aside.
        Place onion and vinegar in a small bowl; season with salt and toss to coat. Let sit, tossing occasionally, until ready to serve.

        5. Place a rack in middle and lower third of oven; preheat to 425°F. Mix miso and 3 Tbsp. room-temperature butter in a small bowl until smooth. Pat chicken dry with paper towels, then rub or brush all over with miso butter. Place chicken in a large cast-iron skillet and roast on middle rack until an instant-read thermometer inserted into the thickest part of breast registers 155°F, 50–60 minutes. (Temperature will climb to 165°F while chicken rests.) Let chicken rest in skillet at least 5 minutes, then transfer to a plate; reserve skillet.

        6. Meanwhile, roast squash on lower rack until mostly tender, about 25 minutes. Remove from oven and scatter reserved bread mixture over, spreading into as even a layer as you can manage. Return to oven and roast until bread is golden brown and crisp and apples are tender, about 15 minutes. Remove from oven, drain pickled onions, and toss to combine. Transfer to a serving dish.

        7. Using your fingers, mash flour and butter in a small bowl to combine.

        8. Set reserved skillet with chicken drippings over medium heat. You should have about ¼ cup, but a little over or under is all good. (If you have significantly more, drain off and set excess aside.) Add wine and cook, stirring often and scraping up any browned bits with a wooden spoon, until bits are loosened and wine is reduced by about half (you should be able to smell the wine), about 2 minutes. Add butter mixture; cook, stirring often, until a smooth paste forms, about 2 minutes. Add broth and any reserved drippings and cook, stirring constantly, until combined and thickened, 6–8 minutes. Remove from heat and stir in miso. Taste and season with salt and black pepper.

        9. Serve chicken with gravy and squash panzanella alongside.
    """
}

# Automated inputs
automated_inputs = [
    "Got it! Ready for step 1.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "Done Continue.",
    "exit"
]

# Example comparison
models_to_compare = [
    # "gpt-4-turbo",
    # "gpt-3.5-turbo",
    "meta-llama/Meta-Llama-3-8B-Instruct"
]

for model in models_to_compare:
    cook_recipe(
        recipe_data=recipe,
        automated_inputs=automated_inputs,
        model_name=model,
        conversation_type="combined"
    )


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 1

Pat the chicken dry with paper towels, making sure to remove any excess moisture. Then, season the chicken all over with 2 teaspoons of salt. This will help bring out the natural flavors of the chicken.

Next, tie the legs together with kitchen twine. This will help the chicken cook more evenly and make it easier to carve later.

Finally, let the chicken sit at room temperature for 1 hour. This will allow the seasonings to penetrate the meat and the chicken to come to room temperature, which is important for even cooking.

AWAITING CONFIRMATION

Trainee: Got it! Ready for step 1.

[Template Question] Trainee: Can I use sea salt instead of kosher salt?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

Yes, you can use sea salt instead of kosher salt for this recipe. However, keep in mind that sea salt can be quite coarse and may not dissolve as easily as kosher salt. This might affect the texture of the chicken's skin.

If you do decide to use sea salt, I recommend using a finer-grained sea salt, such as Maldon or Fleur de Sel, to minimize the texture difference. You may also want to grind the sea salt in a mortar and pestle or a spice grinder to make it more evenly distributed.

Keep in mind that sea salt can also have a stronger flavor than kosher salt, so you may want to start with a smaller amount (1 1/2 teaspoons, for example) and adjust to taste.

Remember, the most important thing is to use a salt that you like and that will complement the flavors of the dish. If you have any other questions or concerns, feel free to ask!

[Template Question] Trainee: Why do we let the chicken sit at room temperature?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

Letting the chicken sit at room temperature for 1 hour before cooking is an important step in this recipe. There are a few reasons why we do this:

1. **Even cooking**: When the chicken is cold, it can take longer to cook evenly. By letting it sit at room temperature, the chicken's internal temperature will be closer to the cooking temperature, which helps it cook more evenly and reduces the risk of undercooked or overcooked areas.
2. **Better browning**: When the chicken is cold, it can be difficult to get a good sear on the skin. By letting it sit at room temperature, the skin will be more relaxed and easier to brown, which adds flavor and texture to the dish.
3. **Reduced cooking time**: Because the chicken is at room temperature, it will cook faster than if it were cold. This can help reduce the overall cooking time and ensure that the chicken is cooked to a safe internal temperature.

By letting the chicken sit at room temperature, we can ensure that it cooks mor

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 2

While the chicken is sitting at room temperature, it's time to prepare the acorn squash. Start by halving the squash lengthwise and scooping out the seeds and pulp from the center. This will help the squash cook more evenly and prevent any bitterness from the seeds.

Next, use a vegetable peeler to remove the skin from the ridges of the squash halves. This will help the squash brown more evenly and add a nice texture to the dish.

Once the skin is removed, cut each squash half into ½-inch thick wedges. You should end up with 4-6 wedges per squash half, depending on their size.

Finally, arrange the squash wedges on a rimmed baking sheet in a single layer. This will help them cook evenly and prevent them from steaming instead of roasting.

AWAITING CONFIRMATION

Trainee: Done Continue.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 3

In a large bowl, combine the fragrant herbs of sage and rosemary with 6 tablespoons of melted butter. This mixture will be the base of the flavorful compound butter that will elevate the dish.

Pour half of the herb butter mixture over the squash wedges on the baking sheet. Make sure to spread it evenly, so each wedge gets a good coating.

Next, sprinkle the squash with a pinch of allspice, a pinch of red pepper flakes, and ½ teaspoon of salt. Add a few grinds of black pepper to taste.

Finally, toss the squash wedges to coat them evenly with the herb butter and spice mixture. This will ensure that every bite of the squash is infused with the delicious flavors.

AWAITING CONFIRMATION

Trainee: Done Continue.

[Template Question] Trainee: Can I substitute rosemary with thyme?


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistantassistant

While rosemary and thyme are both herbs from the same family, they have distinct flavor profiles. Rosemary has a piney, slightly bitter taste, while thyme is more subtle and earthy.

If you substitute rosemary with thyme, the flavor of the dish will change slightly. Thyme will add a more delicate, savory flavor to the squash, which might be a nice complement to the sweetness of the squash. However, keep in mind that thyme is a more delicate herb than rosemary, so you might want to use a bit more of it to get the same level of flavor.

If you do decide to substitute rosemary with thyme, I recommend using a bit more thyme than the recipe calls for (maybe an extra teaspoon or so) to make up for the difference in flavor intensity. Also, be sure to chop the thyme finely before adding it to the butter mixture, so it distributes evenly throughout the dish.

Remember, substituting herbs can be a fun way to experiment with new flavors, but it's always a good idea to t

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistantassistant

Tossing the squash with the herb butter mixture serves several purposes:

1. **Flavor distribution**: By tossing the squash with the herb butter, the flavors of the herbs, butter, and spices are evenly distributed throughout the squash. This ensures that every bite has a good balance of flavors.
2. **Moisture and tenderness**: The herb butter helps to keep the squash moist and tender during the roasting process. The butter and herbs add a richness and depth to the squash, making it more appealing to the palate.
3. **Texture and crunch**: The herb butter also helps to enhance the texture of the squash. The butter and herbs can caramelize and crisp up during the roasting process, adding a nice crunch to the squash.
4. **Aroma and presentation**: Finally, the herb butter adds a beautiful aroma to the squash, making it more appealing to the senses. The tossed squash also looks more appetizing, with the herb butter coating the squash in a fragrant, golden-brown gl

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 4

In the remaining herb butter mixture in the bowl, add the following ingredients:

* 1 cup of bread, cut into 1-inch cubes
* 1/2 cup of diced apples
* 2 tablespoons of oil
* 1/4 teaspoon of salt

Season the mixture with black pepper to taste, then toss everything together until the bread, apples, and herbs are well combined. Set the mixture aside for now.

In a separate small bowl, combine the following ingredients:

* 1/2 cup of diced onion
* 2 tablespoons of vinegar

Season the mixture with salt to taste, then toss everything together until the onion and vinegar are well coated. Let the mixture sit at room temperature, tossing it occasionally, until you're ready to serve.

AWAITING CONFIRMATION

Trainee: Done Continue.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 5

Preheat your oven to 425°F (220°C) and place a rack in the middle and lower third of the oven. This will allow for even cooking and browning of the chicken.

In a small bowl, mix together the miso paste and 3 tablespoons of room-temperature butter until smooth and creamy. This miso butter will add a rich, savory flavor to the chicken.

Pat the chicken dry with paper towels to remove any excess moisture. Then, rub or brush the miso butter all over the chicken, making sure to cover it evenly.

Place the chicken in a large cast-iron skillet and put it on the middle rack of the oven. Roast the chicken for 50-60 minutes, or until an instant-read thermometer inserted into the thickest part of the breast registers 155°F (68°C).

Note that the temperature will continue to climb to 165°F (74°C) while the chicken rests. This is normal and ensures that the chicken is cooked to a safe internal temperature.

Once the chicken is done, remove it from the oven and let it res

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 6

While the chicken is resting, it's time to finish roasting the squash and bread mixture. Place the squash on the lower rack of the oven and roast for about 25 minutes, or until it's mostly tender.

Once the squash is done, remove it from the oven and scatter the reserved bread mixture over the top of the squash. Try to spread the bread mixture into an even layer as much as possible.

Return the squash to the oven and roast for an additional 15 minutes, or until the bread is golden brown and crisp and the apples are tender.

Remove the squash from the oven and let it cool slightly. Drain the pickled onions and toss them to combine with the squash and bread mixture.

Transfer the squash mixture to a serving dish and it's ready to be served alongside the roasted chicken!

AWAITING CONFIRMATION

Trainee: Done Continue.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistantassistant

STEP: 7

In a small bowl, use your fingers to mash together the flour and butter until they are well combined. This is a simple but important step in creating a delicious and flaky crust for your chicken.

To mash the flour and butter, start by holding the bowl in one hand and the butter in the other. Use your fingers to break the butter into small pieces and then work it into the flour, using a gentle pressing motion. Continue to mash the mixture until it forms a cohesive ball and the butter is fully incorporated into the flour.

As you mash the mixture, be sure to work it in a gentle and even manner, using a light touch to avoid over-working the dough. This will help to ensure that the crust turns out flaky and tender, rather than tough and dense.

Once you have finished mashing the mixture, you can set it aside and proceed with the rest of the recipe. The mashed flour and butter will be used to coat the chicken and create a crispy, golden-brown crust.

AWA

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Chef: assistant

STEP: 8

It's time to make the pan sauce! Set the reserved skillet with the chicken drippings over medium heat. You should have about ¼ cup of drippings, but a little more or less is okay. If you have significantly more, drain off the excess and set it aside.

Add the wine to the skillet and cook, stirring often with a wooden spoon to scrape up any browned bits from the bottom of the pan. Continue cooking until the browned bits are loosened and the wine is reduced by about half. You should be able to smell the wine at this point.

Next, add the butter mixture to the skillet and cook, stirring often, until a smooth paste forms. This should take about 2 minutes.

Add the broth and any reserved drippings to the skillet, and cook, stirring constantly, until the mixture is combined and thickened. This should take about 6-8 minutes.

Once the sauce has thickened, remove the skillet from the heat and stir in the miso paste. Taste the sauce and adjust the seasoning as needed 

In [30]:
# Add to your existing imports
!pip install sentence_transformers
from typing import Dict, List
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
from sentence_transformers import SentenceTransformer, util

class LLMEvaluator:
    def __init__(self, baseline_model="gpt-4-turbo", api_key=None):
        self.baseline_model = baseline_model
        self.api_key = api_key  # Store the API key
        self.baseline_conversations = self._load_baseline()
        # If api_key is provided, set it as the default for the openai library
        if self.api_key:
            openai.api_key = self.api_key

    def _load_baseline(self):
        """Load pre-generated ChatGPT conversations as baseline"""
        try:
            with open(f"cooking_session_combined_{self.baseline_model}.json") as f:
                return json.load(f)
        except FileNotFoundError:
            raise Exception("Generate baseline conversations first")

    def evaluate_conversation(self, conversation: List[Dict]) -> Dict:
        """Evaluate a single conversation using multiple metrics"""
        # Convert conversation to text format
        text_convo = [f"{msg['role']}: {msg['content']}" for msg in conversation]

        return {
            "num_turns": len(text_convo),
            "question_ratio": self._question_ratio(text_convo),
            "step_completion": self._step_completion(text_convo),
            "semantic_similarity": self._semantic_similarity(text_convo),
            "fluency_score": self._fluency_score(text_convo),
            "instruction_following": self._instruction_following(text_convo)
        }

    def _question_ratio(self, conversation: List[str]) -> float:
        learner_msgs = [m for m in conversation if m.startswith("user:")]
        return sum(1 for msg in learner_msgs if "?" in msg) / len(learner_msgs) if learner_msgs else 0

    def _step_completion(self, conversation: List[str]) -> float:
        return 1 if any("CONVO-COMPLETE" in msg for msg in conversation) else 0

    def _semantic_similarity(self, conversation: List[str]) -> float:
        # Compare with baseline using sentence transformers
        baseline_text = [" ".join([msg['content'] for msg in self.baseline_conversations])]
        eval_text = [" ".join(conversation)]

        model = SentenceTransformer("all-MiniLM-L6-v2")
        embeddings = model.encode(baseline_text + eval_text)
        return util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()

    def _fluency_score(self, conversation: List[str]) -> float:
            # Use GPT-4 as judge
            prompt = f"""Rate the fluency of this cooking conversation (1-5):

            {''.join(conversation)}

            Score: """

            # Pass the API key to the openai.OpenAI constructor if provided
            if self.api_key:
                client = openai.OpenAI(api_key=self.api_key)
                response = client.chat.completions.create(
                    model="gpt-4",
                    messages=[{"role": "user", "content": prompt}],
                    max_tokens=1
                )
            else:
                response = openai.chat.completions.create( # Use default API key if not provided
                    model="gpt-4",
                    messages=[{"role": "user", "content": prompt}],
                    max_tokens=1
                )
            return float(response.choices[0].message.content.strip())

    def _instruction_following(self, conversation: List[str]) -> float:
        # Use your existing rubric-based evaluation
        rubric = {
            "metric_steps": "Does the conversation cover all recipe steps?",
            "measurements": "Are metric units used consistently?",
            "qna_quality": "Are questions answered accurately?"
        }

        scores = {}
        for criterion, question in rubric.items():
            prompt = f"""Does this conversation {question} (Yes/No)?
            Conversation: {''.join(conversation)}"""

            response = openai.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=1
            )
            scores[criterion] = 1 if "yes" in response.choices[0].message.content.lower() else 0

        return sum(scores.values()) / len(scores)

class EvaluationComparator:
    def __init__(self, evaluator: LLMEvaluator):
        self.evaluator = evaluator
        self.results = []

    def add_model_results(self, model_name: str, conversations: List[List[Dict]]):
        """Evaluate and store results for a model"""
        model_scores = []
        for conv in conversations:
            score = self.evaluator.evaluate_conversation(conv)
            score["model"] = model_name
            model_scores.append(score)

        self.results.extend(model_scores)

    def compare_to_baseline(self) -> pd.DataFrame:
            """Generate comparison report against ChatGPT baseline"""
            df = pd.DataFrame(self.results)

            # Calculate deltas from baseline
            baseline_df = df[df["model"] == self.evaluator.baseline_model]
            # Exclude the 'model' column from the mean calculation and subtraction
            comparison = df.groupby("model").mean().drop(columns=['model'], errors='ignore').subtract(baseline_df.drop(columns=['model'], errors='ignore').mean(), axis=1)

            return comparison[["semantic_similarity", "fluency_score", "instruction_following"]]




In [31]:
evaluator = LLMEvaluator(api_key="")  # Replace with your actual API key
comparator = EvaluationComparator(evaluator)

models_to_compare = ["gpt-4-turbo", "gpt-3.5-turbo"]

for model in models_to_compare:
    # Generate conversations
    conversations = cook_recipe(
        recipe_data=recipe,
        automated_inputs=automated_inputs,
        model_name=model,
        conversation_type="combined"
    )

    # Add to comparator
    comparator.add_model_results(model, [conversations])

report = comparator.compare_to_baseline()
print(report)



Chef: STEP: 1
Pat the chicken dry with paper towels, season it thoroughly with 10 grams of salt, and tie the legs together with kitchen twine. Let the chicken sit at room temperature for 1 hour.
AWAITING CONFIRMATION

Trainee: Got it! Ready for step 1.

[Template Question] Trainee: Can I use sea salt instead of kosher salt?

Chef: Yes, you can use sea salt instead of kosher salt. Just be aware that sea salt can sometimes be finer than kosher salt, so you might want to use a bit less to avoid over-salting the chicken. Adjust according to taste.
AWAITING CONFIRMATION

[Template Question] Trainee: Why do we let the chicken sit at room temperature?

Chef: Allowing the chicken to sit at room temperature for an hour helps in a couple of ways: 

1. It enables the salt to season the chicken more deeply, enhancing its flavor.
2. Bringing the chicken closer to room temperature ensures more even cooking once it's placed in the oven, helping to avoid overcooking the exterior while the interior is