In [1]:
import pandas as pd
import json
import openai

In [2]:
data = pd.read_csv("../dataset/llm_sampled_data.csv")
with open("../openai_key.txt") as f:
    key = f.readline()
openai.api_key = key
MODEL_TO_USE = "text-davinci-003"

In [3]:
with open("../prompts_templates.json") as f:
    ner_prompt = json.load(f)["ingredients_extraction"][0]

In [9]:
ner_prompt

'Do Named entity recognition and return json format on below text\nINPUT_TEXT\n'

In [4]:
def get_response(text):
    model_input = ner_prompt.replace("INPUT_TEXT", text)
    response = openai.Completion.create(
        model=MODEL_TO_USE,
        prompt=model_input,
        temperature=0,
        max_tokens=256,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )
    return response

In [11]:
data["ingredients"][0]

'. 1   lb    cauliflower, in 2 1/2 x 1 inch flowerets  (about 1/2 medium head). 1 1/2  tablespoons    parsley, minced. 1/2-1   tablespoon    chili pepper, seeded and minced. . 2   tablespoons    cider vinegar. 1/2  teaspoon    sugar. 1   pinch    salt. 1   teaspoon    Dijon mustard, country-style. 2   tablespoons    sesame oil. 1   pinch    pepper'

In [6]:
example_response = get_response(data["ingredients_list"][0])

In [8]:
print(example_response["choices"][0]["text"])


{
  "entities": [
    {
      "name": "cauliflower",
      "type": "Food"
    },
    {
      "name": "parsley",
      "type": "Food"
    },
    {
      "name": "chili pepper",
      "type": "Food"
    },
    {
      "name": "cider vinegar",
      "type": "Food"
    },
    {
      "name": "sugar",
      "type": "Food"
    },
    {
      "name": "salt",
      "type": "Food"
    },
    {
      "name": "dijon mustard",
      "type": "Food"
    },
    {
      "name": "sesame oil",
      "type": "Food"
    },
    {
      "name": "pepper",
      "type": "Food"
    }
  ]
}


In [63]:
response = openai.Edit.create(
    model="text-davinci-edit-001",
    input=json.dumps(data["ingredients_raw_str"][0].split("\n"), indent=2),
    instruction=instruction,
    temperature=0,
    top_p=1.0,
)

In [64]:
print(response["choices"][0]["text"])

[
  "water",
  "uncooked old fashion grits",
  "salt",
  "shredded cheddar cheese",
  "garlic, minced ",
  "olive oil"
]



In [40]:
example_response

<OpenAIObject text_completion id=cmpl-6XXWrY1f8twG4NVvAh949V5CJwPst at 0x7feeed00dd60> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\n[{\"entity\": \"4 cups\", \"type\": \"Quantity\"}, {\"entity\": \"water\", \"type\": \"Ingredient\"}, {\"entity\": \"1 cup\", \"type\": \"Quantity\"}, {\"entity\": \"uncooked old fashion grits\", \"type\": \"Ingredient\"}, {\"entity\": \"1 teaspoon\", \"type\": \"Quantity\"}, {\"entity\": \"salt\", \"type\": \"Ingredient\"}, {\"entity\": \"4 ounces\", \"type\": \"Quantity\"}, {\"entity\": \"shredded cheddar cheese\", \"type\": \"Ingredient\"}, {\"entity\": \"1-2 clove\", \"type\": \"Quantity\"}, {\"entity\": \"garlic, minced\", \"type\": \"Ingredient\"}, {\"entity\": \"1 tablespoon\", \"type\": \"Quantity\"}, {\"entity\": \"olive oil\", \"type\": \"Ingredient\"}]"
    }
  ],
  "created": 1673451729,
  "id": "cmpl-6XXWrY1f8twG4NVvAh949V5CJwPst",
  "model": "text-davinci-003",
  "objec

In [42]:
test = json.loads(example_response["choices"][0]["text"])

In [22]:
data["ingredients"][0]

"['water', 'grits', 'salt', 'cheddar cheese', 'garlic', 'olive oil']"

In [99]:
with open("../prompts_templates.json") as f:
    ner_prompt = json.load(f)["ingredients_extraction"][1]

In [129]:
inputs = [
    ner_prompt.replace("INPUT_TEXT", text)
    for text in data.ingredients_raw_str[:50]
]

In [130]:
from tqdm import tqdm

In [131]:
responses = []
number_of_concurrent_responses = 5
for i in tqdm(range(len(inputs) // number_of_concurrent_responses)):
    response = openai.Completion.create(
        model=MODEL_TO_USE,
        prompt=inputs[
            i
            * number_of_concurrent_responses : (i + 1)
            * number_of_concurrent_responses
        ],
        temperature=0,
        max_tokens=1024,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )
    responses += [json.loads(x["text"]) for x in response["choices"]]
if len(inputs) % number_of_concurrent_responses != 0:
    response = openai.Completion.create(
        model=MODEL_TO_USE,
        prompt=inputs[(i + 1) * number_of_concurrent_responses :],
        temperature=0,
        max_tokens=1024,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )
    responses += [json.loads(x["text"]) for x in response["choices"]]

100%|██████████| 10/10 [00:44<00:00,  4.47s/it]


In [132]:
print(responses)

[['water', 'uncooked old fashion grits', 'salt', 'shredded cheddar cheese', 'garlic, minced', 'olive oil'], ['onion', 'red bell pepper', 'garlic cloves', 'extra large shrimp', 'salt', 'hot pepper sauce', 'vegetable oil', 'andouille sausage', 'long grain rice', 'bay leaves', 'diced tomatoes', 'clam juice', 'fresh parsley'], ['canned white beans', 'canned black beans', 'tomatoes', 'onion', 'celery', 'white wine vinegar', 'Italian parsley', 'table salt', 'black pepper', 'olive oil'], ['zucchini', 'yellow squash', 'onion', 'garlic', 'green bell pepper', 'italian seasoning', 'water', 'vegetable broth', 'salt', 'pepper'], ['beef stew meat', 'flour', 'salt', 'allspice', 'cinnamon', 'black pepper', 'vegetable oil', 'onions', 'dried sour cherries', 'sugar', 'water', 'dry red wine', 'beef stock', 'mushroom'], ['slivered almonds', 'cider vinegar', 'sugar', 'sugar', 'salt', 'ground cumin', 'ground coriander', 'cayenne pepper'], ['roasted chopped chicken breasts', 'chopped green onion', 'chopped re

In [137]:
results_dataframe = pd.DataFrame(
    {
        "ingredients_list": data.ingredients_raw_str[
            : len(responses)
        ].tolist(),
        "gpt-3_extracted": responses,
    }
)

In [138]:
results_dataframe.to_csv("ingredients_extraction_results.csv", index=False)