In [1]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512"
import torch
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM
#login(token="")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from huggingface_hub import snapshot_download
snapshot_download(repo_id="lmsys/vicuna-13b-v1.5", local_dir="./data/vicuna/13b", cache_dir="/home/rl3725/scratch/cache_huggingface")

## 1. Zero-Shot Prompting

In [4]:
tokenizer = AutoTokenizer.from_pretrained("./data/vicuna/13b", use_fast=True)
model = AutoModelForCausalLM.from_pretrained("./data/vicuna/13b", device_map="auto")

Loading checkpoint shards: 100%|██████████| 3/3 [00:37<00:00, 12.66s/it]


In [5]:
prompt = "Can you help me to identify in which step of a recipe the user is?"
prompt_template=f"""A chat between a user and a reasoning system. You are a reasoning system that has to identify in which step of a recipe the user is on given some inputs from a perception system. The reasoning system gives helpful, detailed, and polite answers to the user's questions.

USER: {prompt}
ASSISTANT:
"""

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))



*** Generate:




<s> A chat between a user and a reasoning system. You are a reasoning system that has to identify in which step of a recipe the user is on given some inputs from a perception system. The reasoning system gives helpful, detailed, and polite answers to the user's questions.

USER: Can you help me to identify in which step of a recipe the user is?
ASSISTANT:

Sure, I'd be happy to help you identify the step of a recipe that the user is on. To do this, I'll need some information from you. Please provide me with the following details:

1. The recipe name and the type of dish it is.
2. The ingredients used in the recipe.
3. The instructions or steps provided by the recipe.
4. The current state of the dish or any recent actions performed by the user.

With this information, I can analyze the recipe and help you determine the step of the recipe that the user is currently on.</s>


In [5]:
prompt = """
USER:
It's an oatmeal recipe and these are the steps:
1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.,
2: Add 1/2 cup water to the bowl.,
3: Microwave for 30 seconds.,
4: Add handful of raisins and stir.,
5: Microwave for 60 more seconds then stir.,
6: Top with banana slices.,
7: Sprinkle with cinnamon and drizzle with honey before serving.

And these are the possible states detected by the perception system and their probabilities:
oatmeal with water:0.54,
oatmeal with raisins with banana with cinnamon:0.06,
oatmeal with raisins:0.13,
oatmeal with raisins with banana:0.01,
oatmeal[dry]:0.15,
oatmeal with raisins with banana with cinnamon with honey:0.06,
cooked oatmeal:0.00,
empty:0.01
So, in which step of the recipe the user is?
ASSISTANT:
"""
prompt_template=f'{prompt}'

print("Output:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

Output:
<s> 
USER:
It's an oatmeal recipe and these are the steps:
1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.,
2: Add 1/2 cup water to the bowl.,
3: Microwave for 30 seconds.,
4: Add handful of raisins and stir.,
5: Microwave for 60 more seconds then stir.,
6: Top with banana slices.,
7: Sprinkle with cinnamon and drizzle with honey before serving.

And these are the possible states detected by the perception system and their probabilities:
oatmeal with water:0.54,
oatmeal with raisins with banana with cinnamon:0.06,
oatmeal with raisins:0.13,
oatmeal with raisins with banana:0.01,
oatmeal[dry]:0.15,
oatmeal with raisins with banana with cinnamon with honey:0.06,
cooked oatmeal:0.00,
empty:0.01
So, in which step of the recipe the user is?
ASSISTANT:
Based on the probabilities provided, the user is most likely in Step 5 of the recipe, which is microwaving the oatmeal for an additional 60 seconds after adding the raisins. The probability of the state 

## 2. Few-Shot Prompting

In [2]:
import json

def read_json(file_paths):
    data = []
    for file_path in file_paths:
        with open(file_path, 'r') as file:
            recipe_data = json.load(file)
            data.extend(recipe_data)
    return data

def seconds_to_minutes(seconds):
    minutes = seconds // 60
    remaining_seconds = seconds % 60

    return minutes, remaining_seconds

def two_digit_number(number):
    return f'{number:02}'

def make_hr_state(state):
    new_state = state.replace('+', ' with ')
    new_state = new_state.replace('[', '(')
    new_state = new_state.replace(']', ')')
    
    return new_state

def extract_data(data, include_step=True):
    dataset = []
    times = set()
    for entry in data:
        raw_seconds = int(entry['timestamp'])
        if raw_seconds in times:
            continue
        else:
            times.add(raw_seconds)
        minutes, seconds = seconds_to_minutes(raw_seconds)
        image_data_list = entry['objects']
        step = entry['step']
        
        for image_data in image_data_list:
            object = image_data['label']
            if 'state' in image_data:
                states = [f'{make_hr_state(s)}: {round(v,2)}' for s,v in image_data['state'].items()]
                text = f'At 01:{two_digit_number(minutes)}:{two_digit_number(seconds)} PM, the possible object states for {object} are: {", ".join(states)}. The predicted step is:'
                if include_step:
                    text = text + f' {step}'
                dataset.append(text)

    return dataset

In [3]:
tokenizer = AutoTokenizer.from_pretrained("./data/vicuna/13b", use_fast=True)
model = AutoModelForCausalLM.from_pretrained("./data/vicuna/13b", device_map="auto")

Loading checkpoint shards: 100%|██████████| 3/3 [00:37<00:00, 12.50s/it]


In [4]:
file_paths = ['./perception_outputs/data_oatmeal_2023.11.03-18.51.03.json']
data = read_json(file_paths)
train_dataset = extract_data(data)
train_dataset[:10]

['At 01:00:14 PM, the possible object states for mug are: dripper with filter with coffee(dry): 0.47, dripper with filter: 0.3, tea-bag: 0.05, tea-bag with water: 0.15, dripper with filter with coffee(wet): 0.05, dripper(empty): 0.05, empty: 0.09. The predicted step is: 1',
 'At 01:00:15 PM, the possible object states for mug are: dripper with filter with coffee(dry): 0.61, dripper with filter: 0.01, tea-bag: 0.02, tea-bag with water: 0.06, dripper with filter with coffee(wet): 0.04, dripper(empty): 0.11, empty: 0.09, water: 0.06. The predicted step is: 1',
 'At 01:00:15 PM, the possible object states for bowl are: oatmeal(cooked): 0.55, oatmeal(dry): 0.45. The predicted step is: 1',
 'At 01:00:16 PM, the possible object states for mug are: dripper with filter with coffee(dry): 0.52, dripper with filter: 0.07, tea-bag: 0.12, tea-bag with water: 0.03, dripper with filter with coffee(wet): 0.07, dripper(empty): 0.06, empty: 0.05, water: 0.04, dripper with filter with coffee(drained): 0.1

In [5]:
prompt = "Can you help me to identify in which step of a recipe the user is?"
prompt_template=f"""A chat between a user and a reasoning system. You are a reasoning system that has to identify in which step of a recipe the user is on given some inputs from a perception system. The reasoning system gives helpful, detailed, and polite answers to the user's questions.

USER: {prompt}
ASSISTANT:
"""

print("\n\n*** Generate:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
print(tokenizer.decode(output[0]))



*** Generate:




<s> A chat between a user and a reasoning system. You are a reasoning system that has to identify in which step of a recipe the user is on given some inputs from a perception system. The reasoning system gives helpful, detailed, and polite answers to the user's questions.

USER: Can you help me to identify in which step of a recipe the user is?
ASSISTANT:

Sure, I'd be happy to help you identify the step of a recipe that the user is on. To do this, I'll need some information from you. Please provide me with the following details:

1. The recipe name and the type of dish it is.
2. The ingredients used in the recipe.
3. The instructions or steps provided by the recipe.
4. The current state of the dish or any recent actions performed by the user.

With this information, I can analyze the recipe and help you determine the step of the recipe that the user is currently on.</s>


In [6]:
prompt = f"""
USER:
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system and the predicted steps for them.
{'. '.join(train_dataset[:10])}
Are you ready to predict which step of a recipe the user is on, given some object states identified by a perception system?
ASSISTANT:
"""
prompt_template=f'{prompt}'

print("Output:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

Output:
<s> 
USER:
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system and the predicted steps for them.
At 01:00:14 PM, the possible object states for mug are: dripper with filter with coffee(dry): 0.47, dripper with filter: 0.3, tea-bag: 0.05, tea-bag with water: 0.15, dripper with filter with coffee(wet): 0.05, dripper(empty): 0.05, empty: 0.09. The predicted step is: 1. At 01:00:15 PM, the possible object states for mug are: dripper with filter with coffee(dry): 0.61, dripper with filter: 0.01, tea-bag: 0.02, tea-bag with water: 0.06, dripper with filter with co

In [7]:
file_paths = ['./perception_outputs/data_oatmeal_2023.11.03-19.04.04.json']
data = read_json(file_paths)
test_dataset = extract_data(data, include_step=False)
test_dataset[:10]

['At 01:00:02 PM, the possible object states for bowl are: oatmeal with raisins: 0.21, oatmeal with water: 0.2, oatmeal(dry): 0.11, empty: 0.2, oatmeal with raisins with banana: 0.09, oatmeal(cooked): 0.73, oatmeal with raisins with banana with cinnamon: 0.09. The predicted step is:',
 'At 01:00:03 PM, the possible object states for bowl are: oatmeal with raisins: 0.29, oatmeal with water: 0.27, oatmeal(dry): 0.06, empty: 0.11, oatmeal with raisins with banana: 0.12, oatmeal(cooked): 0.37, oatmeal with raisins with banana with cinnamon: 0.04, oatmeal with raisins with banana with cinnamon with honey: 0.05. The predicted step is:',
 'At 01:00:03 PM, the possible object states for bowl are: empty: 0.73, oatmeal with raisins with banana with cinnamon with honey: 0.18, oatmeal with raisins with banana with cinnamon: 0.09. The predicted step is:',
 'At 01:00:04 PM, the possible object states for bowl are: oatmeal with raisins: 0.2, oatmeal with water: 0.19, oatmeal(dry): 0.03, empty: 0.55, 

In [8]:
prompt = f"""
USER:
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system. Please predict the steps for them.
{test_dataset[1]}

ASSISTANT:
"""
prompt_template=f'{prompt}'

print("Output:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

Output:
<s> 
USER:
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system. Please predict the steps for them.
At 01:00:03 PM, the possible object states for bowl are: oatmeal with raisins: 0.29, oatmeal with water: 0.27, oatmeal(dry): 0.06, empty: 0.11, oatmeal with raisins with banana: 0.12, oatmeal(cooked): 0.37, oatmeal with raisins with banana with cinnamon: 0.04, oatmeal with raisins with banana with cinnamon with honey: 0.05. The predicted step is:

ASSISTANT:
At 01:00:03 PM, the possible object states for the raisins are: raisins(dry): 0.08, raisins(cooked): 0.3

In [14]:
prompt = f"""
USER:
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system. Please predict the steps for them.
{test_dataset[10]}<</SYS>>

[/INST]
"""
prompt_template=f'{prompt}'

print("Output:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

Output:
<s> 
<s> [INST] <<SYS>>
It's an oatmeal recipe and these are the steps:
Step 1: Measure 4 Tablespoons of quick-cook oats and place in a microwave-safe bowl.
Step 2: Add 1/2 cup water to the bowl.
Step 3: Microwave for 30 seconds.
Step 4: Add handful of raisins and stir.
Step 5: Microwave for 60 more seconds then stir.
Step 6: Top with banana slices.
Step 7: Sprinkle with cinnamon and drizzle with honey before serving.

Below you will see examples of object states identified by the perception system. Please predict the steps for them.
At 01:00:06 PM, the possible object states for bowl are: empty: 0.72, oatmeal with raisins with banana with cinnamon with honey: 0.0, oatmeal with raisins with banana with cinnamon: 0.02, oatmeal(cooked): 0.19, oatmeal with water: 0.06, oatmeal with raisins with banana: 0.0, oatmeal(dry): 0.0, oatmeal with raisins: 0.0. The predicted step is:<</SYS>>

[/INST]
Based on the possible object states you provided, the predicted step is:

Step 3: Microwav

#### Free-form Questions

In [17]:
prompt = f"""
<s>[INST] <<SYS>>
When did the user start the step 1 of the recipe?<</SYS>>
[/INST]
"""
prompt_template=f'{prompt}'

print("Output:")

input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
print(tokenizer.decode(output[0]))

Output:
<s> 
<s> [INST] <<SYS>>
When did the user start the step 1 of the recipe?<</SYS>>
[/INST]
 As a system, I don't have the ability to track the user's actions or start times. The user may have started the step 1 of the recipe at any point in the past, and there is no information available to determine when exactly they started.

However, if the user has provided any information or context about their progress in the recipe, it may be possible to infer when they started step 1 based on that information. For example, if the user has mentioned that they started the recipe yesterday, it may be reasonable to assume that they started step 1 on the previous day.

Without any additional information, it is not possible to determine when the user started step 1 of the recipe with certainty.</s>
