In [7]:
import pandas as pd
import json
import openai
from ast import literal_eval
from tqdm import tqdm

In [2]:
data = pd.read_csv("../dataset/recipes_w_search_terms.csv")
with open("../openai_key.txt") as f:
    key = f.readline()
openai.api_key = key
MODEL_TO_USE = "text-davinci-003"

In [3]:
data.ingredients = data.ingredients.apply(lambda x: "\n".join(literal_eval(x)))

In [4]:
with open("../prompts_templates.json") as f:
    prompt = json.load(f)["dietary_tags_classification"][0]

In [6]:
model_inputs = [
    prompt.replace("INPUT_TEXT", text) for text in data.ingredients[:50]
]

In [9]:
responses = []
number_of_concurrent_responses = 2
for i in tqdm(range(len(model_inputs) // number_of_concurrent_responses)):
    response = openai.Completion.create(
        model=MODEL_TO_USE,
        prompt=model_inputs[
            i
            * number_of_concurrent_responses : (i + 1)
            * number_of_concurrent_responses
        ],
        temperature=0,
        max_tokens=1024,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )
    responses += [json.loads(x["text"]) for x in response["choices"]]
if len(model_inputs) % number_of_concurrent_responses != 0:
    response = openai.Completion.create(
        model=MODEL_TO_USE,
        prompt=model_inputs[(i + 1) * number_of_concurrent_responses :],
        temperature=0,
        max_tokens=1024,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
    )
    responses += [json.loads(x["text"]) for x in response["choices"]]

100%|██████████| 25/25 [01:18<00:00,  3.15s/it]


In [10]:
responses

[{'Vegetarian': False,
  'Vegan': False,
  'Gluten-Free': True,
  'Dairy-Free': False,
  'Low-Carb': False,
  'Low-Fat': False,
  'Keto': False},
 {'Vegetarian': False,
  'Vegan': False,
  'Gluten-Free': True,
  'Dairy-Free': True,
  'Keto': False,
  'Paleo': False},
 {'Vegan': True,
  'Vegetarian': True,
  'Gluten-Free': True,
  'Dairy-Free': True,
  'Low-Carb': False,
  'Low-Fat': False,
  'Keto': False},
 {'Vegetarian': True,
  'Vegan': False,
  'Gluten-Free': True,
  'Dairy-Free': True,
  'Nut-Free': True},
 {'vegetarian': False,
  'vegan': False,
  'gluten_free': False,
  'dairy_free': True,
  'nut_free': True},
 {'Vegan': True,
  'Vegetarian': True,
  'Gluten-Free': True,
  'Dairy-Free': True,
  'Nut-Free': False,
  'Soy-Free': True,
  'Low-Carb': False,
  'Low-Sugar': False},
 {'vegetarian': False,
  'vegan': False,
  'gluten_free': True,
  'dairy_free': False,
  'nut_free': True},
 {'Vegetarian': False,
  'Vegan': False,
  'Gluten-Free': True,
  'Dairy-Free': False,
  'Nut-Free

In [11]:
results_dataframe = pd.DataFrame(
    {
        "ingredients_list": data.ingredients[: len(responses)].tolist(),
        "gpt-3_classification": responses,
    }
)

In [12]:
results_dataframe.to_csv(
    "multioutput_dietary_tags_classification_results.csv", index=False
)