<h1><center>The rising use case of LLM: Structuring untructured data</center></h1>

## Demo Start

In [1]:
import os
import json
import time
from pathlib import Path

import pandas as pd
from langchain.output_parsers import PydanticOutputParser
from langchain_mistralai.chat_models import ChatMistralAI
from dotenv import load_dotenv

from core import run
from prompt import DEFAULT_BASE_PROMPT, create_prompt
from schemas import Recipe

In [2]:
load_dotenv()
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")

In [3]:
path_to_data = Path(os.getcwd()) / "data" / "input"
df = pd.read_json(path_to_data / "recipes_v1.json")
df.head()

Unnamed: 0,title,date,tags,introduction,ingredients,direction,output
0,Creamy Mashed Potatoes,2021-03-12,"[potato, side, cheesefare]",![Creamy Mashed Potatoes](/pix/creamy-mashed-p...,The quantities here are for about four adult p...,1. Peel and cut the potatoes into medium sized...,"{'title': 'Creamy Mashed Potatoes', 'ingredien..."
1,Red Sauce (Ragu all'Italiana),2021-03-16,"[italian, sauce]",My great-grandma's red sauce. All purpose: goo...,- 1/3 lb salt pork - 2 lb chuck roast - 3 Clov...,"1. Mince the garlic, peel the carrot, peel and...","{'title': 'Red Sauce (Ragu all'Italiana)', 'in..."
2,Turmeric Flatbread,2022-08-09,"[bread, turkish]",A great companion to Turkish Red Lentil Soup. ...,- 1 cup Wheat Flour (white or whole) - 1/2 Tbs...,"1. Combine flour, turmeric, salt and baking po...","{'title': 'Turmeric Flatbread', 'ingredients':..."
3,Zurich-Style Meat Saute,2023-01-06,"[beef, cream, swiss, quick]","Originally called ""Züri Gschnätzlets"" (Zurich ...","- 600g\tBeef, finely sliced - 250g\tMushrooms,...",1. Fry the meat in butter at high heat until i...,"{'title': 'Zurich-Style Meat Saute', 'ingredie..."
4,Tuscan Style Pork Roast,2021-03-10,"[italian, pork, roast]",![tuscan-style-pork-roast](/pix/tuscan-style-p...,- 1 pork Roast - 2-3 Tbsp fresh rosemary - 8 c...,1.\tPreheat oven to 275°F (135°C) 2.\tButterfl...,"{'title': 'Tuscan Style Pork Roast', 'ingredie..."


In [18]:
model_name = "open-mixtral-8x7b"
#model_name = "open-mixtral-8x22b"
llm = ChatMistralAI(api_key=MISTRAL_API_KEY, model_name=model_name)
parser = PydanticOutputParser(pydantic_object=Recipe)
prompt = create_prompt(DEFAULT_BASE_PROMPT, parser, df["ingredients"][0], df["direction"][0])
prompt

[HumanMessage(content='\nWhat are the ingredients and their associated quantities as well as the steps to make the recipe described by the following The quantities here are for about four adult portions. If you are planning on eating this as a side dish, it might be more like 6-8 portions. * 1kg potatoes * 200ml milk* * 200ml mayonnaise* * ~100g cheese * Garlic powder * 12-16 strips of bacon * Butter * 3-4 green onions * Black pepper * Salt  *You can play with the proportions depending on how creamy or dry you want the mashed potatoes to be. and 1. Peel and cut the potatoes into medium sized pieces. 2. Put the potatoes in a pot with some water so that it covers the potatoes and   boil them for about 20-30 minutes, or until the potatoes are soft. 3. About ten minutes before removing the potatoes from the boiling water, cut   the bacon into little pieces and fry it. 4. Warm up the milk and mayonnaise. 5. Shred the cheese. 6. When the potatoes are done, remove all water from the pot, add 

In [19]:
raw_outputs = []
outputs = []

In [None]:
example = await run(llm, prompt, parser)
example

## End of Demo

In [None]:
df_sample = df.sample(10)

In [20]:
df_sample = df_sample.reset_index(drop=True)
for i in range(df_sample.shape[0]):
  
    prompt = create_prompt(DEFAULT_BASE_PROMPT, parser, df_sample["ingredients"][i], df_sample["direction"][i])
    recipe = await run(llm, prompt, parser)
    raw_outputs.append(recipe)
    
    output_map = {
        "recipe_id":1,
        "original_title":df_sample["title"][i],
    }

    if isinstance(recipe, Recipe):
        output = recipe.dict() | output_map
        outputs.append(output)
    else:
        output_map["raw_llm_output"] = recipe
        outputs.append(output_map)

    time.sleep(1)
   
        

Error in parsing Invalid json output: {
  "name": "Mediterranean Chicken Bulghur Salad",
  "serving_size": 4,
  "ingredients": [
    {
      "id": 1,
      "name": "Chicken Breasts",
      "quantity": 4
    },
    {
      "id": 2,
      "name": "Bulghur Wheat",
      "quantity": 1,
      "unit": "cup"
    },
    {
      "id": 3,
      "name": "Mixed Olives",
      "quantity": 1,
      "unit": "oz"
    },
    {
      "id": 4,
      "name": "Baby Spinach",
      "quantity": 4,
      "unit": "oz"
    },
    {
      "id": 5,
      "name": "Za'atar Spice Blend",
      "quantity": 2,
      "unit": "tbsp"
    },
    {
      "id": 6,
      "name": "Roma Tomatoes",
      "quantity": 2
    },
    {
      "id": 7,
      "name": "Lemon",
      "quantity": 1
    },
    {
      "id": 8,
      "name": "Feta Cheese",
      "quantity": 1/2,
      "unit": "cup"
    },
    {
      "id": 9,
      "name": "Garlic Cloves",
      "quantity": 2
    },
    {
      "id": 10,
      "name": "White Vinegar",
     

In [21]:
outputs

[{'recipe_id': 1,
  'original_title': "Za'atar Chicken Bulghur Bowls",
  'raw_llm_output': '{\n  "name": "Mediterranean Chicken Bulghur Salad",\n  "serving_size": 4,\n  "ingredients": [\n    {\n      "id": 1,\n      "name": "Chicken Breasts",\n      "quantity": 4\n    },\n    {\n      "id": 2,\n      "name": "Bulghur Wheat",\n      "quantity": 1,\n      "unit": "cup"\n    },\n    {\n      "id": 3,\n      "name": "Mixed Olives",\n      "quantity": 1,\n      "unit": "oz"\n    },\n    {\n      "id": 4,\n      "name": "Baby Spinach",\n      "quantity": 4,\n      "unit": "oz"\n    },\n    {\n      "id": 5,\n      "name": "Za\'atar Spice Blend",\n      "quantity": 2,\n      "unit": "tbsp"\n    },\n    {\n      "id": 6,\n      "name": "Roma Tomatoes",\n      "quantity": 2\n    },\n    {\n      "id": 7,\n      "name": "Lemon",\n      "quantity": 1\n    },\n    {\n      "id": 8,\n      "name": "Feta Cheese",\n      "quantity": 1/2,\n      "unit": "cup"\n    },\n    {\n      "id": 9,\n      "nam

In [12]:
recipes = [doc["recipe"] for doc in outputs[11:]]
recipes

[Recipe(name='Wholemeal Wheat Bread', serving_size=10, ingredients=[Ingredient(id=1, name='Wholemeal Wheat Flour', quantity=500.0, unit='g'), Ingredient(id=2, name='Lukewarm Water', quantity=280.0, unit='ml'), Ingredient(id=3, name='Dry Yeast', quantity=8.0, unit='g'), Ingredient(id=4, name='Olive Oil', quantity=3.5, unit='spoon'), Ingredient(id=5, name='Salt', quantity=1.5, unit='teaspoon')], steps=[Step(number=1, description='Combine all the ingredients in a bowl and knead into a dough.', preparation_time=15, cooking_time=None, waiting_time=None, used_ingredients=[1, 2, 3, 4, 5]), Step(number=2, description='Let the dough rest for ~60 minutes somewhere lukewarm.', preparation_time=None, cooking_time=None, waiting_time=60, used_ingredients=[1, 2, 3, 4, 5])], total_preparation_time=75, total_cooking_time=None, total_waiting_time=None, comments=['Make sure the water is lukewarm, not hot, to avoid killing the yeast.', 'You can adjust the amount of water if the dough is too dry or too sti

In [14]:
recipes_docs = [recipe.dict() for recipe in recipes if isinstance(recipe, Recipe)]
recipes_docs

[{'name': 'Wholemeal Wheat Bread',
  'serving_size': 10,
  'ingredients': [{'id': 1,
    'name': 'Wholemeal Wheat Flour',
    'quantity': 500.0,
    'unit': 'g'},
   {'id': 2, 'name': 'Lukewarm Water', 'quantity': 280.0, 'unit': 'ml'},
   {'id': 3, 'name': 'Dry Yeast', 'quantity': 8.0, 'unit': 'g'},
   {'id': 4, 'name': 'Olive Oil', 'quantity': 3.5, 'unit': 'spoon'},
   {'id': 5, 'name': 'Salt', 'quantity': 1.5, 'unit': 'teaspoon'}],
  'steps': [{'number': 1,
    'description': 'Combine all the ingredients in a bowl and knead into a dough.',
    'preparation_time': 15,
    'cooking_time': None,
    'waiting_time': None,
    'used_ingredients': [1, 2, 3, 4, 5]},
   {'number': 2,
    'description': 'Let the dough rest for ~60 minutes somewhere lukewarm.',
    'preparation_time': None,
    'cooking_time': None,
    'waiting_time': 60,
    'used_ingredients': [1, 2, 3, 4, 5]}],
  'total_preparation_time': 75,
  'total_cooking_time': None,
  'total_waiting_time': None,
  'comments': ['Make 

In [22]:
with open("sample_8x22b.json", "w") as f:
    json.dump(outputs, f, indent=4, ensure_ascii=False)

In [23]:
df_parsed = pd.json_normalize(outputs, sep=".")
df_parsed.columns

Index(['recipe_id', 'original_title', 'raw_llm_output', 'name', 'serving_size',
       'ingredients', 'steps', 'total_preparation_time', 'total_cooking_time',
       'total_waiting_time', 'comments', 'inference_assumptions'],
      dtype='object')

In [24]:
df_parsed

Unnamed: 0,recipe_id,original_title,raw_llm_output,name,serving_size,ingredients,steps,total_preparation_time,total_cooking_time,total_waiting_time,comments,inference_assumptions
0,1,Za'atar Chicken Bulghur Bowls,"{\n ""name"": ""Mediterranean Chicken Bulghur Sa...",,,,,,,,,
1,1,Wholemeal Wheat Flour Pizza Dough,,Wholemeal Wheat Bread,,"[{'id': 1, 'name': 'Wholemeal Wheat Flour', 'q...","[{'number': 1, 'description': 'Combine all the...",,,60.0,[],[]
2,1,Hakka-Style Meatballs,,Spicy Meatballs with Peppers and Rice,3.0,"[{'id': 1, 'name': 'Ground Beef', 'quantity': ...","[{'number': 1, 'description': 'Preheat oven to...",,,,[],[]
3,1,Medieval Beef Soup,,Slow Cooker Beef and Turnip Soup,4.0,"[{'id': 1, 'name': 'beef roast', 'quantity': N...","[{'number': 1, 'description': 'Cube the roast ...",25.0,140.0,25.0,[],[]
4,1,Hearty Breakfast Oatmeal,,Hearty Breakfast Oatmeal,1.0,"[{'id': 1, 'name': 'Large Flake Oats', 'quanti...","[{'number': 1, 'description': 'Chop or mash nu...",,,,"[Adjust the amount of nuts, cranberries, cinna...",[Assumed that the recipe is for a single servi...
5,1,Red Lentil Dahl,,Yellow Curry Lentil Soup,,"[{'id': 1, 'name': 'Olive Oil', 'quantity': 3....","[{'number': 1, 'description': 'Heat oil in a s...",,,,[],[]
6,1,Beef Wellington,,Beef Wellington,4.0,"[{'id': 1, 'name': 'Fillet Beef', 'quantity': ...","[{'number': 1, 'description': 'Season the fill...",,,,"[Fillet beef is preferred for a soft, tender a...",[]
7,1,Cheddar-Crusted Chicken,,Panko and Cheddar Crusted Chicken,2.0,"[{'id': 1, 'name': 'Chicken Breasts', 'quantit...","[{'number': 1, 'description': 'Combine panko, ...",20.0,20.0,,"[This recipe serves 2 people., Ensure the chic...",[]
8,1,Lenten Lentil Curry,,Green Lentils with Vermicelli,,"[{'id': 1, 'name': 'green lentils', 'quantity'...","[{'number': 1, 'description': 'Thoroughly rins...",,,,[Have a blessed Lent.],[]
9,1,Hamburger dressing,,Spicy Sauce,,"[{'id': 1, 'name': 'mayonnaise or aioli', 'qua...","[{'number': 1, 'description': 'Mix ingredients...",5.0,,,[Garlic is optional if using aioli],[Assumed that the recipe is for a single servi...
