This notebook will generate baseline results using `gpt-3.5-turbo` and save to GCS so we can evaluate in `evaluate.ipynb`.

In [41]:
from google.cloud import secretmanager, storage
from io import StringIO
import pandas as pd
import openai
from tqdm import tqdm_notebook

In [32]:
def get_df_from_gcs_blob(blob, bucket='recipe-data-bucket'):
    # START: COPIED FROM https://github.com/googleapis/python-storage/blob/HEAD/samples/snippets/storage_fileio_write_read.py
    storage_client = storage.Client()
    bucket = storage_client.get_bucket(bucket)

    blob = bucket.blob(blob)
    blob = blob.download_as_string()
    blob = blob.decode()
    blob = StringIO(blob)  #tranform bytes to string here
    df = pd.read_csv(blob)
    # END: COPIED FROM https://github.com/googleapis/python-storage/blob/HEAD/samples/snippets/storage_fileio_write_read.py
    return df

def parse_ingred(inp):
    inp = inp.split('<ingredients>')
    ingreds = inp[1].split(', ')
    return [i.split(' (')[0] for i in ingreds]

def parse_cal(inp):
    inp = inp.split('<ingredients>')
    cal = inp[2].split('<calories>')[1]
    return cal

In [33]:
train_df = get_df_from_gcs_blob('train_only_cal.csv')
test_df = get_df_from_gcs_blob('test_only_cal.csv')

In [42]:
train_df['inp_ingreds'] = train_df['input'].map(parse_ingred)
train_df['inp_cal'] = train_df['input'].map(parse_cal)
train_gpt_df = train_df[['inp_ingreds', 'inp_cal']]

test_df['inp_ingreds'] = test_df['input'].map(parse_ingred)
test_df['inp_cal'] = test_df['input'].map(parse_cal)
test_gpt_df = test_df[['inp_ingreds', 'inp_cal']]

In [35]:
# START: COPIED FROM https://cloud.google.com/secret-manager/docs/access-secret-version
sm_client = secretmanager.SecretManagerServiceClient()
secret_name = "projects/controllable-recipe-generation/secrets/open-ai-api-key/versions/1"
response = sm_client.access_secret_version(name=secret_name)
# END: COPIED FROM https://cloud.google.com/secret-manager/docs/access-secret-version
openai_client = openai.OpenAI(api_key=response.payload.data.decode('UTF-8'))

In [37]:
def gen_recipe_chat_gpt(ingreds, cal):
    response = openai_client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": f"Generate a recipe with the following ingredients: {', '.join(ingreds)}. Ensure the recipe has {cal} calories."},
        ]
    )
    return response.choices[0].message.content

In [44]:
gpt_gens = []
for ind, row in tqdm_notebook(train_gpt_df.iterrows(), total=train_gpt_df.shape[0]):
    gpt_gens.append(gen_recipe_chat_gpt(row['inp_ingreds'], row['inp_cal']))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for ind, row in tqdm_notebook(train_gpt_df.iterrows(), total=train_gpt_df.shape[0]):


  0%|          | 0/4115 [00:00<?, ?it/s]

In [50]:
train_gpt_df['gpt_out'] = gpt_gens
train_gpt_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_gpt_df['gpt_out'] = gpt_gens


Unnamed: 0,inp_ingreds,inp_cal,gpt_out
0,"[Spanish sherry vinegar, virgin olive oil, pap...",2250,Recipe: Spicy Sherry Vinegar Chicken with Papr...
1,"[mayonnaise, basil, lemon juice, garlic, lemon...",877,Recipe: Creamy Lemon Basil Pasta with Grilled ...
2,"[vanilla bean, apple, ed apple cider, ted butter]",1360,Recipe: Vanilla-Apple Cider Cake with Browned ...
3,"[olive oil, lime juice, orange juice, cilantro...",1861,Recipe: Citrus-Marinated Grilled Shrimp with C...
4,"[brandy, crème de cacao, heavy cream, nutmeg]",1510,Recipe: Nutmeg Brandy Cream Delight\n\nIngredi...


In [201]:
def parse_title(output):
    return output.split('Ingredients')[0][8:].strip()

def parse_ingred(output):
    try:
        ingreds = output.lower().split('ingredient')[1][3:].split('instructions')[0].split('- ')
    except Exception as e:
        return float('nan')
    for i in range(len(ingreds)):
        if ingreds[i]:
            ingreds[i] = ingreds[i].strip()
            ingreds[i] = ingreds[i].split(' (')[0]

    return ', '.join([i for i in ingreds if i])

def parse_directions(output):
    if 'Instructions' in output:
        output = output.split('Instructions')[1]
    elif 'Directions' in output:
        output = output.split('Directions')[1]
    elif 'Preparation' in output:
        output = output.split('Preparation')[1]
    else:
        return float('nan')
    
    output = output[1:].strip().split('\n\n')
    output = sum([l.split('\n') for l in output], [])
    return ', '.join(output)

In [170]:
train_gpt_df['title'] = train_gpt_df['gpt_out'].map(parse_title)
train_gpt_df['ingreds'] = train_gpt_df['gpt_out'].map(parse_ingred)
train_gpt_df['directions'] = train_gpt_df['gpt_out'].map(parse_directions)
train_gpt_df['gpt_out_formatted'] = '<title>' + train_gpt_df['title'] + '<title>' + '<ingredients>' + train_gpt_df['ingreds'] + '<ingredients>' + '<directions>' + train_gpt_df['directions'] + '<directions>'
train_gpt_df.head()

Unnamed: 0,inp_ingreds,inp_cal,gpt_out,title,ingreds,directions,gpt_out_formatted
0,"[Spanish sherry vinegar, virgin olive oil, pap...",2250,Recipe: Spicy Sherry Vinegar Chicken with Papr...,Spicy Sherry Vinegar Chicken with Paprika Rice,"4 chicken breasts, 2 tablespoons spanish sherr...","1. Preheat your oven to 375°F (190°C)., 2. In ...",<title>Spicy Sherry Vinegar Chicken with Papri...
1,"[mayonnaise, basil, lemon juice, garlic, lemon...",877,Recipe: Creamy Lemon Basil Pasta with Grilled ...,Creamy Lemon Basil Pasta with Grilled Chicken,"2 boneless, skinless chicken breasts, salt and...",1. Preheat your grill or grill pan to medium-h...,<title>Creamy Lemon Basil Pasta with Grilled C...
2,"[vanilla bean, apple, ed apple cider, ted butter]",1360,Recipe: Vanilla-Apple Cider Cake with Browned ...,Vanilla-Apple Cider Cake with Browned Butter G...,"2 cups all-purpose flour, 1 ½ cups granulated ...",1. Preheat the oven to 350°F (175°C). Grease a...,<title>Vanilla-Apple Cider Cake with Browned B...
3,"[olive oil, lime juice, orange juice, cilantro...",1861,Recipe: Citrus-Marinated Grilled Shrimp with C...,Citrus-Marinated Grilled Shrimp with Cilantro ...,"1 lb, 3 tbsp olive oil, juice of 2 limes, juic...","1. In a large mixing bowl, combine the olive o...",<title>Citrus-Marinated Grilled Shrimp with Ci...
4,"[brandy, crème de cacao, heavy cream, nutmeg]",1510,Recipe: Nutmeg Brandy Cream Delight\n\nIngredi...,Nutmeg Brandy Cream Delight,"4 ounces brandy, 3 ounces crème de cacao, 4 ou...","1. In a cocktail shaker, combine the brandy an...",<title>Nutmeg Brandy Cream Delight<title><ingr...


In [171]:
# upload to GCS
storage_client = storage.Client()
bucket = storage_client.get_bucket('recipe-data-bucket')

blob = bucket.blob('train_only_cal_gpt_results.csv')

csv_buffer = StringIO()
train_gpt_df[['gpt_out_formatted']].to_csv(csv_buffer, index=False)
blob.upload_from_string(csv_buffer.getvalue())

In [172]:
gpt_gens = []
for ind, row in tqdm_notebook(test_gpt_df.iterrows(), total=test_gpt_df.shape[0]):
    gpt_gens.append(gen_recipe_chat_gpt(row['inp_ingreds'], row['inp_cal']))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for ind, row in tqdm_notebook(test_gpt_df.iterrows(), total=test_gpt_df.shape[0]):


  0%|          | 0/1029 [00:00<?, ?it/s]

In [173]:
test_gpt_df['gpt_out'] = gpt_gens
test_gpt_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_gpt_df['gpt_out'] = gpt_gens


Unnamed: 0,inp_ingreds,inp_cal,gpt_out
0,"[pecan halves, ted butter, sea salt]",1428,Recipe: Salted Pecan Butter\n\nIngredients:\n-...
1,"[olive oil, red cabbage, red onions, thyme]",1286,Recipe: Sautéed Red Cabbage with Caramelized R...
2,"[chocolate, sugar, Amaretto, ted butter]",1690,Recipe: Chocolate Amaretto Truffles\n\nIngredi...
3,"[milk, yellow corn meal, salt, molasses]",778,Recipe: Cornmeal Porridge with Molasses\n\nIng...
4,"[vodka, lime juice, triple sec]",519,Recipe: Vodka Lime Cooler\n\nServings: 1\nCalo...


In [203]:
test_gpt_df['title'] = test_gpt_df['gpt_out'].map(parse_title)
test_gpt_df['ingreds'] = test_gpt_df['gpt_out'].map(parse_ingred)
test_gpt_df['directions'] = test_gpt_df['gpt_out'].map(parse_directions)
test_gpt_df['gpt_out_formatted'] = '<title>' + test_gpt_df['title'] + '<title>' + '<ingredients>' + test_gpt_df['ingreds'] + '<ingredients>' + '<directions>' + test_gpt_df['directions'] + '<directions>'
test_gpt_df.head()

Unnamed: 0,inp_ingreds,inp_cal,gpt_out,title,ingreds,directions,gpt_out_formatted
0,"[pecan halves, ted butter, sea salt]",1428,Recipe: Salted Pecan Butter\n\nIngredients:\n-...,Salted Pecan Butter,"300g pecan halves, 200g unsalted butter, softe...","1. Preheat the oven to 350°F (175°C)., 2. Spre...",<title>Salted Pecan Butter<title><ingredients>...
1,"[olive oil, red cabbage, red onions, thyme]",1286,Recipe: Sautéed Red Cabbage with Caramelized R...,Sautéed Red Cabbage with Caramelized Red Onion...,"2 tablespoons olive oil, 1 medium-sized red ca...",1. Heat a large sauté pan over medium heat. Ad...,<title>Sautéed Red Cabbage with Caramelized Re...
2,"[chocolate, sugar, Amaretto, ted butter]",1690,Recipe: Chocolate Amaretto Truffles\n\nIngredi...,Chocolate Amaretto Truffles,"350g dark chocolate, 1/3 cup granulated sugar,...",1. Melt the dark chocolate in a heatproof bowl...,<title>Chocolate Amaretto Truffles<title><ingr...
3,"[milk, yellow corn meal, salt, molasses]",778,Recipe: Cornmeal Porridge with Molasses\n\nIng...,Cornmeal Porridge with Molasses,"2 cups milk, 1/2 cup yellow cornmeal, 1/4 teas...",1. Pour the milk into a medium-sized saucepan ...,<title>Cornmeal Porridge with Molasses<title><...
4,"[vodka, lime juice, triple sec]",519,Recipe: Vodka Lime Cooler\n\nServings: 1\nCalo...,Vodka Lime Cooler\n\nServings: 1\nCalories per...,"2 ounces vodka, 2 ounces lime juice, 1 ounce t...","1. In a cocktail shaker, combine the vodka, li...",<title>Vodka Lime Cooler\n\nServings: 1\nCalor...


In [204]:
# upload to GCS
storage_client = storage.Client()
bucket = storage_client.get_bucket('recipe-data-bucket')

blob = bucket.blob('test_only_cal_gpt_results.csv')

csv_buffer = StringIO()
test_gpt_df[['gpt_out_formatted']].to_csv(csv_buffer, index=False)
blob.upload_from_string(csv_buffer.getvalue())