In [1]:
%load_ext dotenv
%dotenv

In [2]:
!pwd

/Users/asaf/Workspace/biu/complex-utterance-to-code/notebooks


In [3]:
import sys
import os 

WORK_AREA = '..'
os.chdir(WORK_AREA)

paths = ['./src/', './src/api/v6', './notebooks/src']
for path in paths:
    path = os.path.normcase(path)
    if not any(os.path.normcase(sp) == path for sp in sys.path):
        sys.path.append(path)

In [4]:
from typing import Union, List
import openai
import glob
import pandas as pd
import numpy as np
import tqdm
from transformers import GPT2TokenizerFast
import math
import tokenize
from nltk.translate import bleu_score
from datetime import datetime
import time

In [5]:
!pip freeze | grep openai

openai @ file:///home/conda/feedstock_root/build_artifacts/openai_1686159246812/work


In [6]:
openai.organization = os.getenv("OPENAI_API_ORG")
openai.api_key = os.getenv("OPENAI_API_KEY")

In [7]:
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

## Building a prompt

### Build prompt from the API docstrings

In [23]:

def build_spec_prompt(path: str = './config/prompts/**/*.txt'):
    prompt_dict = {}
    for prompt_file in glob.glob(path):
        key = os.path.basename(prompt_file).split('.')[0].lower()
        with open(prompt_file, "r") as f:
            prompt_dict[key] = f.read()
    
    prompt = ""
    for key, value in prompt_dict.items():
        prompt = prompt + f"# {key.upper()}:\n\n{value}\n\n"
    
    return prompt

In [24]:
prompt = build_spec_prompt()

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
len(tokenizer(prompt, max_length=51200, truncation=True)["input_ids"])

15397

### Examples prompt


Building a prompt from the text and code examples generated by the generator

In [143]:
file_path = 'build/train_complex_utterance_to_code_with_intermediate_40k.csv.gz'
examples_df = pd.read_csv(file_path)
examples_df = examples_df.reset_index()  # make sure indexes pair with number of rows

In [168]:
def build_example_prompt(text, code=None, base_prompt=None, chat_format=False):
    text_prompt = f"{base_prompt or ''}Text: {text}"
    code_prompt = f"Code:\n{code}" if code else f"code:\n"
    example_prompt = f"{text_prompt}\n{code_prompt}" if not chat_format else [
        {
            "role": "user",
            "content": text_prompt
        },
        {
            "role": "assistant",
            "content": code_prompt
        }
    ]
    return example_prompt


def build_examples_prompt(df, limit=10, chat_format=True):
    example_prompts = []
    for index, row in df[:limit].iterrows():
        example_prompts += build_example_prompt(
            text=row['text'], 
            code=row['code'],
            base_prompt=None,#f"{index + 1}.\n",
            chat_format=chat_format
        )
        # examples_prompt = f"{examples_prompt}{example_prompt}\n"

    # end_prompt = f"Based on the previous examples, convert the following text into Python code:"
    
    if chat_format:
        prompt = example_prompts
    else:
        prompt = '\n'.join(example_prompts)
    
    return prompt

In [149]:
print(build_example_prompt(examples_df['text'][0], examples_df['code'][0], chat_format=True))

[{'role': 'user', 'content': 'Text: see if find my first reminders that I have a meeting at 3pm and there are and see if I got a reminder at mindnight in 2 days to bring the keys'}, {'role': 'assistant', 'content': 'Code:\nperson_reminded = Contact.resolve_from_text("my")\ncontent = Content.resolve_from_text("I have a meeting at 3pm")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)\nreminders = first(reminders)\nResponder.respond(response=reminders)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)\n\nperson_reminded = Contact.resolve_from_text("I")\ndate_time = DateTime.resolve_from_text("mindnight in 2 days")\ncontent = Content.resolve_from_text("bring the keys")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, date_time=date_time, content=content)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)'}]


In [150]:
prompt = build_examples_prompt(examples_df, limit=30)

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
base_prompt_tokens_len = len(tokenizer(prompt, max_length=51200, truncation=True)["input_ids"])
print(f"Base prompt tokens length: {base_prompt_tokens_len}")

Base prompt tokens length: 49250


In [189]:
def build_prompt(prompt_type: str = 'examples', examples_limit: int = 30, chat_format: bool = True):
    if prompt_type == 'examples':
        prompt = build_examples_prompt(examples_df, limit=examples_limit, chat_format=chat_format)
    elif prompt_type == 'apispec':
        spec_prompt = build_spec_prompt()
        # text, code = examples_df.iloc[0][['text', 'code']]
        prompt = f"""
    Convert the following task into Python code.
    This is the API specification:\n{spec_prompt}

    Based on the these specfications, convert the following text into Python code:
    """
    
    return prompt

## Evaluation

### Evaluation data

In [190]:
FILE_NAME = 'eval_complex_utterance_to_code_with_intermediate_152_20231112.csv.gz'
BASE_PATH = '/Users/asaf/Workspace/biu/complex-utterance-to-code/build'

def load_eval_data(file_name: str = FILE_NAME, base_path: str = BASE_PATH) -> pd.DataFrame:
    eval_df = pd.read_csv(os.path.join(base_path, file_name))
    eval_df = eval_df.reset_index()  # make sure indexes pair with number of rows
    return eval_df

### Evaluation code

In [21]:
def build_test_code(code: str, imports: str, test: str, code_embed_str: str = '# end code block to test', fail_on_error: bool = False, verbose: str = 'Fatal'):
  try:
    code_insert_idx = test.find(code_embed_str)
    program_code = imports
    program_code += '\n'
    program_code += test[:code_insert_idx]
    program_code += code
    program_code += '\n'
    program_code += test[code_insert_idx:]
  except Exception as e:
    if verbose == 'Error':
      print('[ERROR] Failed to unparse code rep to code\n', e)
    if fail_on_error:
      raise e
    program_code = ''
  finally:
    return program_code
  
  
def tokenize_source(code):
    file_path = "/tmp/example.py"

    with open(file_path, "w") as text_file:
        text_file.write(code)
        
    with open(file_path, 'rb') as f:
        tokens_gen = tokenize.tokenize(f.readline)
        tokens = []
        try:
          for token in tokens_gen:
            tokens.append(token.string)
        except Exception as e:
          pass
        
    os.remove(file_path)
    return tokens


def eval_code(code: str):
  test_results = {}
  try:
    context = {}
    exec(code, context)
    test_results = context.get('test_results', {})
  except AssertionError as e:
    test_results['test_failuers'] = test_results.get('test_failuers', 0) + 1
  except Exception as e:
    test_results['code_failure'] = test_results.get('code_failure', 0) + 1

  code_failure = test_results.get('code_failure', 0)
  correct = test_results.get('correct', 0)
  incorrect = test_results.get('incorrect', 0)
  total = (correct + incorrect) or math.inf
  accuracy = (1 - code_failure) * (correct / total)

  results = dict(
    code_failure = code_failure,
    correct = correct,
    incorrect = incorrect,
    accuracy = accuracy,
  )

  return results


def eval_bleu(code, generated_code):
  hypothesis = tokenize_source(code)
  reference = tokenize_source(generated_code)
  weights = (0.25, 0.25, 0.25, 0.25)
  score = bleu_score.sentence_bleu([reference], hypothesis, weights=weights)
  return score


def humaneval_accuracy_score(
    data: pd.DataFrame, 
    code_column_name: str = 'pred_code', 
    score_id_labels: Union[str, List[str]] = 'sample_id', 
    score_column_name: str = 'accuracy', 
):
    test_codes = data.apply(lambda x: build_test_code(code=x[code_column_name], imports=x['imports'], test=x['test']), axis=1)
    test_results = test_codes.apply(lambda test_code: eval_code(test_code))
    test_results_df = pd.DataFrame.from_records(
        test_results.values, index=test_results.index
    )
    score = test_results_df.reset_index(drop=False).groupby(score_id_labels)[score_column_name].mean().mean()
    return dict(score=score, results=test_results_df)


def bleu_accuracy_score(
    data: pd.DataFrame, 
    generated_column='output', 
    gold_column='code',
    score_id_labels: Union[str, List[str]] = 'sample_id', 
    score_column_name: str = 'bleu_score', 
):
    eval_results = data.apply(lambda x: eval_bleu(x[gold_column], x[generated_column]), axis=1)
    eval_results_df = eval_results.to_frame('bleu_score')
    score = eval_results_df.reset_index(drop=False).groupby(score_id_labels)[score_column_name].mean().mean()
    return dict(score=score, results=eval_results_df)
  
  
def model_eval(
    results_file_path, 
    output_column='output', 
    gold_column='code', 
    parse_to_code=False, 
    compute_humanval=True, 
    compute_bleu=True
):
    results_df = pd.read_csv(results_file_path, compression='gzip')
    
    results_df['sample_id'] = results_df['sample_id'].astype(int)
    results_df.set_index(['sample_id', 'sample_minor_id'], inplace=True)
    results_df.sort_index(inplace=True)
    
    code_column = 'generated_code'
    if parse_to_code:
        results_df[code_column] = results_df[output_column].apply(lambda x: parse_code_rep_to_code(x))
    else: 
        results_df[code_column] = results_df[output_column]
        
    results_df['test'] = results_df['test'].str.replace("= next(iterator)", "= next(iterator, None)")
    results_df[code_column] = results_df[code_column].str.replace(" = ContentType.", " = MessageContentType.")
    results_df[code_column] = results_df[code_column].str.replace("Message.", "Messages.")

    humaneval_results = humaneval_accuracy_score(
        data=results_df, 
        code_column_name=code_column) if compute_humanval else None
    
    bleu_results = bleu_accuracy_score(
        data=results_df, 
        generated_column=code_column, 
        gold_column=gold_column) if compute_bleu else None
    
    results = dict(
        humaneval = humaneval_results,
        bleu = bleu_results
    )
    return results
  
  

## OpenAI Predictions

### List available models

In [77]:
oai_models = openai.Model.list()
print([model_data['id'] for model_data in oai_models['data']])

['text-search-babbage-doc-001', 'gpt-4', 'gpt-3.5-turbo-16k', 'curie-search-query', 'text-davinci-003', 'text-search-babbage-query-001', 'babbage', 'babbage-search-query', 'text-babbage-001', 'text-similarity-davinci-001', 'gpt-3.5-turbo-1106', 'davinci-similarity', 'code-davinci-edit-001', 'curie-similarity', 'babbage-search-document', 'curie-instruct-beta', 'text-search-ada-doc-001', 'davinci-instruct-beta', 'gpt-3.5-turbo-instruct', 'text-similarity-babbage-001', 'text-search-davinci-doc-001', 'gpt-3.5-turbo-instruct-0914', 'babbage-similarity', 'text-embedding-ada-002', 'davinci-search-query', 'text-similarity-curie-001', 'text-davinci-001', 'text-search-davinci-query-001', 'ada-search-document', 'ada-code-search-code', 'babbage-002', 'gpt-4-vision-preview', 'davinci-002', 'gpt-4-0314', 'davinci-search-document', 'curie-search-document', 'babbage-code-search-code', 'gpt-4-0613', 'text-search-ada-query-001', 'code-search-ada-text-001', 'gpt-3.5-turbo-16k-0613', 'babbage-code-search-

In [78]:
print([model_data['id'] for model_data in oai_models['data'] if 'code' in model_data['id']])

['code-davinci-edit-001', 'ada-code-search-code', 'babbage-code-search-code', 'code-search-ada-text-001', 'babbage-code-search-text', 'code-search-babbage-code-001', 'ada-code-search-text', 'code-search-babbage-text-001', 'code-search-ada-code-001']


In [79]:
print([model_data['id'] for model_data in oai_models['data'] if 'gpt' in model_data['id']])

['gpt-4', 'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-1106', 'gpt-3.5-turbo-instruct', 'gpt-3.5-turbo-instruct-0914', 'gpt-4-vision-preview', 'gpt-4-0314', 'gpt-4-0613', 'gpt-3.5-turbo-16k-0613', 'gpt-3.5-turbo-0613', 'gpt-4-1106-preview', 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo']


### text-davinci-003

In [18]:
MODEL_NAME = 'text-davinci-003'

In [23]:
print(build_examples_prompt(examples_prompt, examples_df[:1], limit=15))


Transform text to code

# EXAMPLES:

text: 
see if find my first reminders that I have a meeting at 3pm and there are and see if I got a reminder at mindnight in 2 days to bring the keys

code: 
person_reminded = Contact.resolve_from_text("my")
content = Content.resolve_from_text("I have a meeting at 3pm")
reminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)
reminders = first(reminders)
Responder.respond(response=reminders)
test_reminders = bool(reminders)
Responder.respond(response=test_reminders)

person_reminded = Contact.resolve_from_text("I")
date_time = DateTime.resolve_from_text("mindnight in 2 days")
content = Content.resolve_from_text("bring the keys")
reminders = Reminders.find_reminders(person_reminded=person_reminded, date_time=date_time, content=content)
test_reminders = bool(reminders)
Responder.respond(response=test_reminders)





In [19]:
examples_prompt = """
Transform text to code

# EXAMPLES:

"""
base_prompt = build_examples_prompt(examples_prompt, examples_df[:1], limit=15)

responses = []
for i, row  in tqdm_notebook(eval_df[:1].iterrows(), total=eval_df.shape[0], desc="Processing records"):
    prompt = base_prompt
    prompt += build_example_prompt(text=row['text'])
    
    response = openai.Completion.create(engine=MODEL_NAME, prompt=prompt, max_tokens=1000)
    responses.append(response)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i, row  in tqdm_notebook(eval_df[:1].iterrows(), total=eval_df.shape[0], desc="Processing records"):


Processing records:   0%|          | 0/152 [00:00<?, ?it/s]

InvalidRequestError: This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?

In [314]:
date_str = datetime.now().strftime("%Y%m%d-%H%M%S")
responses_file_path = f'./build/openai-{MODEL_NAME}-{date_str}-{file_name}'
responses_file_path

'./build/openai-text-davinci-003-eval_complex_utterance_to_code_with_intermediate_82_20230519.csv.gz'

In [334]:
responses_data = [response['choices'][0]['text'] for response in responses]
eval_oai_df = eval_df.copy()
eval_oai_df['output'] = pd.Series(responses_data)
eval_oai_df.head()

Unnamed: 0,index,test_id,sample_id,sample_minor_id,text,code,test,imports,lang_rep,code_rep,output
0,0,0,0,,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,"product_name = ProductName.resolve_from_text(""..."
1,1,1_a,1,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"date_time = DateTime.resolve_from_text(""tomorr..."
2,2,1_b,1,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"date_time = DateTime.resolve_from_text(""tomorr..."
3,3,2,2,,Play the new Taylor Swift album and pull up my...,"album = Album.resolve_from_text(""the new Taylo...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Play ] ] ...,[ Module [ album = Album.resolve_from_text('th...,"album_name = AlbumName.resolve_from_text(""the ..."
4,4,3_a,3,a,Send a message to dad if it rains tomorrow.,"date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Body [...,[ Module [ date_time = DateTime.resolve_from_t...,"destination = Contact.resolve_from_text(""dad"")..."


In [323]:

eval_oai_df.to_csv(responses_file_path, index=False, compression='gzip')

In [324]:
eval_oai_df = pd.read_csv(responses_file_path)
eval_oai_df.head()

Unnamed: 0,index,test_id,sample_id,sample_minor_id,text,code,test,imports,lang_rep,code_rep,output
0,0,0,0,,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,"product_name = ProductName.resolve_from_text(""..."
1,1,1_a,1,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"date_time = DateTime.resolve_from_text(""tomorr..."
2,2,1_b,1,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"date_time = DateTime.resolve_from_text(""tomorr..."
3,3,2,2,,Play the new Taylor Swift album and pull up my...,"album = Album.resolve_from_text(""the new Taylo...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Play ] ] ...,[ Module [ album = Album.resolve_from_text('th...,"album_name = AlbumName.resolve_from_text(""the ..."
4,4,3_a,3,a,Send a message to dad if it rains tomorrow.,"date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Body [...,[ Module [ date_time = DateTime.resolve_from_t...,"destination = Contact.resolve_from_text(""dad"")..."


In [347]:
model_eval(
    responses_file_path,
    compute_humanval=True, 
    compute_bleu=True
)

{'humaneval': {'score': 0.08333333333333333,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         0        4          0       1.0
  1         a                           1        0          0       0.0
            b                           1        0          0       0.0
  2         NaN                         1        0          0       0.0
  3         a                           1        0          0       0.0
  ...                                 ...      ...        ...       ...
  104       b                           0        1          0       1.0
  105       NaN                         1        0          0       0.0
  108       NaN                         1        0          0       0.0
  109       a                           1        0          0       0.0
            b                           1        0          0       0.0
  
  [92

In [339]:
results_file_path = responses_file_path
parse_to_code = False
output_column = 'output'

results_df = pd.read_csv(results_file_path, compression='gzip')

results_df['sample_id'] = results_df['sample_id'].astype(int)
results_df.set_index(['sample_id', 'sample_minor_id'], inplace=True)
results_df.sort_index(inplace=True)

code_column = 'generated_code'
results_df[code_column] = results_df[output_column]
    
results_df['test'] = results_df['test'].str.replace("= next(iterator)", "= next(iterator, None)")
results_df[code_column] = results_df[code_column].str.replace(" = ContentType.", " = MessageContentType.")
results_df[code_column] = results_df[code_column].str.replace("Message.", "Messages.")

In [346]:
data = results_df
gold_column = 'code'
generated_column = 'generated_code'
score_id_labels = 'sample_id'
score_column_name: str = 'bleu_score'

eval_results = data.apply(lambda x: eval_bleu(x[gold_column], x[generated_column]), axis=1)
eval_results_df = eval_results.to_frame('bleu_score')
score = eval_results_df.reset_index(drop=False).groupby(score_id_labels)[score_column_name].mean().mean()
dict(score=score, results=eval_results_df)

{'score': 0.4386397592741266,
 'results':                            bleu_score
 sample_id sample_minor_id            
 0         NaN                0.641628
 1         a                  0.535784
           b                  0.521357
 2         NaN                0.437082
 3         a                  0.413056
 ...                               ...
 104       b                  0.679365
 105       NaN                0.299206
 108       NaN                0.365651
 109       a                  0.310716
           b                  0.291676
 
 [92 rows x 1 columns]}

In [335]:
eval_oai_df.set_index(['sample_id', 'sample_minor_id'], inplace=True)

In [348]:
print(eval_oai_df['output'].loc[(1, 'a')])

date_time = DateTime.resolve_from_text("tomorrow morning")
weather_forecasts = Weather.find_weather_forecasts(date_time=date_time)
test_rain = any((weather_forecast.weather_type for weather_forecast in weather_forecasts) == Rain)
te_est_weather_forecasts = bool(weather_forecasts)
Responder.respond(response=test_weather_forecasts)
if test_weather_forecasts and test_rain:
  date_time = DateTime.resolve_from_text("7:30")
  Alarm.create_alarm(date_time=date_time)
else:
  date_time = DateTime.resolve_from_text("8")
  Alarm.create_alarm(date_time=date_time)


In [None]:
date_time = DateTime.resolve_from_text("tomorrow morning")
weather_forecasts = Weather.find_weather_forecasts(date_time=date_time)
test_rain = any((weather_forecast.weather_type for weather_forecast in weather_forecasts) == Rain)
te_est_weather_forecasts = bool(weather_forecasts)
Responder.respond(response=test_weather_forecasts)
if test_weather_forecasts and test_rain:
  date_time = DateTime.resolve_from_text("7:30")
  Alarm.create_alarm(date_time=date_time)
else:
  date_time = DateTime.resolve_from_text("8")
  Alarm.create_alarm(date_time=date_time)

In [337]:
print(eval_oai_df['output'].loc[(105, None)])

event_name = EventName.resolve_from_text("the art festival")
date_time = DateTime.resolve_from_text("this weekend")
events = Calendar.find_events(event_name=event_name, date_time=date_time)
Tickets.purchase_tickets(events=events)

address = Address.resolve_from_text("the address")
Navigation.add_address_to_navigation(address=address)


In [None]:
event_name = EventName.resolve_from_text("the art festival")
date_time = DateTime.resolve_from_text("this weekend")
events = Calendar.find_events(event_name=event_name, date_time=date_time)
Tickets.purchase_tickets(events=events)

address = Address.resolve_from_text("the address")
Navigation.add_address_to_navigation(address=address)

In [343]:
print(eval_oai_df['output'].loc[(55, None)])

person_reminded = Contact.resolve_from_text("me")
date_time = DateTime.resolve_from_text("tomorrow")
contacts = Contact.resolve_many_from_text("Mom and Dad")
content = Content.resolve_from_text("send an email to contacts")
Reminders.create_reminder(person_reminded=person_reminded, date_time=date_


#### Evaluating the results

In [None]:
eval_df.columns

Index(['index', 'test_id', 'sample_id', 'sample_minor_id', 'text', 'code',
       'test', 'imports', 'lang_rep', 'code_rep', 'generated_code'],
      dtype='object')

In [None]:
eval_df['generated_code'] = responses_df['choices'].apply(lambda choices: choices[0]['text'] if choices else None)
eval_df['test_code'] = eval_df.apply(lambda row: build_test_code(code=row['generated_code'], imports=row['imports'], test=row['test']), axis=1)
eval_df['results'] = eval_df['test_code'].apply(lambda code: eval_code(code))

scores_df = compute_scores(eval_df, index='sample_id')
scores_df.groupby('sample_id')['score'].mean().mean()

0.0625

In [237]:
MODEL_NAME = 'text-davinci-003'

In [238]:
examples_prompt = """
Transform text to code

# EXAMPLES:

"""
base_prompt = build_examples_prompt(examples_prompt, examples_df, limit=13)

responses = []
for i, row  in tqdm_notebook(eval_df.iterrows(), total=eval_df.shape[0], desc="Processing records"):
    prompt = base_prompt
    prompt += build_example_prompt(text=row['text'])
    
    response = openai.Completion.create(engine=MODEL_NAME, prompt=prompt, max_tokens=1000)
    responses.append(response)

Processing records:   0%|          | 0/82 [00:00<?, ?it/s]

In [191]:
responses_df = pd.DataFrame(responses)
responses_df.to_csv(f'../build/openai-{MODEL_NAME}-{file_name}', index=False, compression='gzip')

In [205]:
responses_df['choices'].apply(lambda choices: choices[0]['text'] if choices else None)

0     walmart_availability = Store.check_availabilit...
1     date_time = DateTime.resolve_from_text("tomorr...
2     date_time = DateTime.resolve_from_text("tomorr...
3     music_source = MusicSource.resolve_from_text("...
4     recipient = Recipient.resolve_from_text("Dad")...
                            ...                        
77    spotify_playlist_name = "lofi"\nMediaPlayer.pl...
78    date_time = DateTime.resolve_from_text("tonigh...
79    date_time = DateTime.resolve_from_text("tonigh...
80    date_time_start = DateTime.resolve_from_text("...
81    date_time_tomorrow = DateTime.resolve_from_tex...
Name: choices, Length: 82, dtype: object

#### Evaluating the results

In [212]:
eval_df.columns

Index(['index', 'test_id', 'sample_id', 'sample_minor_id', 'text', 'code',
       'test', 'imports', 'lang_rep', 'code_rep', 'generated_code'],
      dtype='object')

In [219]:
eval_df['generated_code'] = responses_df['choices'].apply(lambda choices: choices[0]['text'] if choices else None)
eval_df['test_code'] = eval_df.apply(lambda row: build_test_code(code=row['generated_code'], imports=row['imports'], test=row['test']), axis=1)
eval_df['results'] = eval_df['test_code'].apply(lambda code: eval_code(code))

scores_df = compute_scores(eval_df, index='sample_id')
scores_df.groupby('sample_id')['score'].mean().mean()

0.0625

### gpt-3.5-turbo

In [349]:
MODEL_NAME = 'gpt-3.5-turbo-16k'

In [591]:
k = 1
wait_time_in_seconds = 1
id_labels = ['test_id', 'sample_id', 'sample_minor_id']
prompt_type = 'examples' # 'examples' or 'apispec'

In [592]:
if prompt_type == 'apispec':
    prompt = build_spec_prompt()
    text, code = examples_df.iloc[0][['text', 'code']]
    base_messages = [
        {"role": "user", "content": f"This is the API specification:\n{prompt}"},
        {"role": "assistant", "content": "OK"},
        {"role": "user", "content": f"Transform the following English text to code:\n{text}"},
        {"role": "assistant", "content": code},
    ]
else:
    base_messages = build_examples_prompt(examples_prompt, examples_df, limit=70, chat_format=True)

In [593]:
responses_file_path = f'./build/openai-{MODEL_NAME}-prompt-{prompt_type}-k{k}-{file_name}'
print(responses_file_path)

./build/openai-gpt-3.5-turbo-16k-prompt-examples-k1-eval_complex_utterance_to_code_with_intermediate_152_20230525.csv.gz


In [598]:
eval_df.sort_index(inplace=True)
eval_oai_df = pd.read_csv(responses_file_path, compression='gzip') if os.path.exists(responses_file_path) else eval_df.copy()
eval_oai_df.set_index(id_labels, inplace=True)
eval_oai_df.sort_index(inplace=True)

responses = []
force = False
for i, row  in tqdm.notebook.tqdm(eval_df.iterrows(), total=eval_df.shape[0], desc="Processing records"):
    index = tuple(row[id_label] for id_label in id_labels)
    if not force and 'output' in eval_oai_df.loc[index] and not pd.isnull(eval_oai_df.loc[index]['output']):
        time.sleep(0.01)
    else:
        response = openai.ChatCompletion.create(
            model=MODEL_NAME, 
            messages= [{"role": "system", "content": "You are a code programmer."}] +
                base_messages + 
                [{"role": "user", "content": f"Transform the following English text to code:\n{row['text']}"}],
            max_tokens=1000,
            n=k
        )
        responses.append(response)
        
        outputs = [x['message']['content'] for x in response['choices']]
        outputs = [output.replace("code:\n", "").strip() for output in outputs]
        ks = list(np.arange(k))
        
        eval_oai_df['output'] = eval_oai_df['output'].astype(object) if 'output' in eval_oai_df.columns else None
        eval_oai_df.loc[:, 'output'].loc[index] = [outputs]
        eval_oai_df['k'] = eval_oai_df['k'].astype(object) if 'k' in eval_oai_df.columns else None
        eval_oai_df.loc[:, 'k'].loc[index] = [ks]
        eval_oai_df = eval_oai_df.explode(['output', 'k'])
        
        eval_oai_df.to_csv(responses_file_path, index=True, compression='gzip')
        
        time.sleep(wait_time_in_seconds)

Processing records:   0%|          | 0/152 [00:00<?, ?it/s]

### gpt-4

In [474]:
MODEL_NAME = 'gpt-4'

In [475]:
n = 10
wait_time_in_seconds = 5
id_labels = ['sample_id'] #['test_id', 'sample_id', 'sample_minor_id']
prompt_type = 'examples' # 'examples' or 'apispec'
model_id = 'openai_' + MODEL_NAME.replace('-', '_')
slug = 'text2code'

In [484]:
test_df = load_eval_data()[1:3]
print(test_df.shape)
test_df.head(3)

(2, 12)


Unnamed: 0,index,test_id,sample_id,sample_minor_id,text,code,test,imports,lang_rep,lang_rep_raw,code_rep,code_rep_raw
1,1,1_a,1,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...
2,2,1_b,1,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...


In [485]:
results_file_path = f"./build/results/test-{str(test_df.shape[0])}-{model_id}-{slug}-n{n}.csv.gz"
print(results_file_path)

./build/results/test-2-openai_gpt_4-text2code-n10.csv.gz


In [486]:
# loading the results file
test_results_df = pd.read_csv(results_file_path, compression='gzip') if os.path.exists(results_file_path) else test_df.copy()
test_results_df.set_index(id_labels, inplace=True)
test_results_df.sort_index(inplace=True)
test_results_df.head(3)

Unnamed: 0_level_0,index,test_id,sample_minor_id,text,code,test,imports,lang_rep,lang_rep_raw,code_rep,code_rep_raw
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...


In [487]:
chat_format = True
prompt = build_prompt(prompt_type=prompt_type, examples_limit=1, chat_format=chat_format)
base_messages = prompt if chat_format else [{"role": "user", "content": prompt}]
print(f"prompt_type: {prompt_type}\nprompt: {base_messages[:1000]}")

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
base_prompt_tokens_len = len(tokenizer(" ".join([p["content"] for p in prompt]), max_length=51200, truncation=True)["input_ids"])
print(f"Base prompt tokens length: {base_prompt_tokens_len}")

prompt_type: examples
prompt: [{'role': 'user', 'content': 'Text: see if find my first reminders that I have a meeting at 3pm and there are and see if I got a reminder at mindnight in 2 days to bring the keys'}, {'role': 'assistant', 'content': 'Code:\nperson_reminded = Contact.resolve_from_text("my")\ncontent = Content.resolve_from_text("I have a meeting at 3pm")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)\nreminders = first(reminders)\nResponder.respond(response=reminders)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)\n\nperson_reminded = Contact.resolve_from_text("I")\ndate_time = DateTime.resolve_from_text("mindnight in 2 days")\ncontent = Content.resolve_from_text("bring the keys")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, date_time=date_time, content=content)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)'}]
Base prompt tokens length: 264


In [488]:
openai_disabled = False
force = False
total_records = test_df.shape[0]
max_tokens = 500

# generate predictions
responses = []
print(f"Generating predictions for {total_records} records")
for i, row  in tqdm.notebook.tqdm(test_df.iterrows(), total=total_records, desc="Processing records"):   
    # check to see if we already have a result for this record
    index = tuple(row[id_label] for id_label in id_labels) if len(id_labels) > 1 else row[id_labels[0]]
    index = [index]
    if (not force) and ('output' in test_results_df.loc[index]) and (not any(pd.isnull(test_results_df.loc[index, 'output']))):
        # if we do, then skip this record
        time.sleep(0.01)
    elif not openai_disabled:
        # run the model, if we don't have a result
        messages = [{
            "role": "system", 
            "content": "You are a skilled programmer. You will be provided with a text description and your task is to convert it into Python code. Below are examples of text descriptions and their corresponding Python code implementations."
        }] + base_messages + [{
            "role": "user",
            "content": "Based on the previous examples, convert the following text into Python code:\n" + build_example_prompt(text=row["text"])
        }]
        response = openai.ChatCompletion.create(
            model=MODEL_NAME, 
            messages=messages,
            max_tokens=max_tokens,
            n=n
        )
        responses.append(response)
        
        outputs = [x['message']['content'] for x in response['choices']]
        outputs = [output.replace("Code:\n", "").replace("```python", "").replace("```py", "").replace("```", "").strip() for output in outputs]
        ns = list(np.arange(n))
        
        # duplicate the records
        records_to_duplicate = test_results_df.loc[index] # Fetch the records
        duplicated_records = pd.concat([records_to_duplicate] * (n - 1), ignore_index=False) # Duplicate the records
        test_results_df = pd.concat([test_results_df, duplicated_records], ignore_index=False) # Append the duplicated records back to the original DataFrame (optional)
        
        # set values for output and n
        test_results_df['output'] = test_results_df['output'].astype(object) if 'output' in test_results_df.columns else None
        test_results_df.loc[index, 'output'] = outputs * len(records_to_duplicate)
        test_results_df['n'] = test_results_df['n'].astype(object) if 'n' in test_results_df.columns else None
        test_results_df.loc[index, 'n'] = ns * len(records_to_duplicate)
        
        test_results_df.to_csv(results_file_path, index=True, compression='gzip')
        
        time.sleep(wait_time_in_seconds)

Generating predictions for 2 records


Processing records:   0%|          | 0/2 [00:00<?, ?it/s]

In [489]:
test_results_df

Unnamed: 0_level_0,index,test_id,sample_minor_id,text,code,test,imports,lang_rep,lang_rep_raw,code_rep,code_rep_raw,output,n
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,The Python code for the given requirement woul...,0
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"To implement this, you would require a mechani...",1
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"from datetime import datetime, timedelta\n\n# ...",2
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"from datetime import datetime, timedelta\n\nde...",3
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"from datetime import datetime, timedelta\n\n# ...",4
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,import schedule\nfrom datetime import datetime...,5
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"To achieve this with Python, we'll need to inc...",6
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"from datetime import datetime, timedelta\n\n# ...",7
1,1,1_a,a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,"from datetime import datetime, timedelta\n\n# ...",8
1,2,1_b,b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ root [ advcl [ mark [ If ] ] [ nsubj [ it ] ...,[ Module [ date_time = DateTime.resolve_from_t...,[ Module [ Assign [ Name [ date_time ] ] [ Cal...,Python \nimport datetime\nfrom some_weather_pa...,9


In [285]:
test_results_df.explode(['output', 'n'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,text,code,test,imports,lang_rep,lang_rep_raw,code_rep,code_rep_raw,output,n
test_id,sample_id,sample_minor_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,"pepsi_product = Product.resolve_from_text(""Pep...",0
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,"# Walmart\nstore = Store.resolve_from_text(""Wa...",1
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,"walmart = Store(""Walmart"")\nwalgreens = Store(...",2
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,"walmart = Store.resolve_from_text(""Walmart"")\n...",3
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,walmart_inventory = InventoryManager('Walmart'...,4
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,"walmart = Store.resolve_from_text(""Walmart"")\n...",5
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,walmart = Store('Walmart')\nwalgreens = Store(...,6
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,walmart = Store(name='Walmart')\nwalgreens = S...,7
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,store1 = 'Walmart'\nstore2 = 'Walgreens'\nitem...,8
0,0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ root [ hd [ Check ] ] [ obj [ det [ the ] ] ...,[ Module [ product_name = ProductName.resolve_...,[ Module [ Assign [ Name [ product_name ] ] [ ...,```python\n# Define the item and stores\nitem ...,9


In [498]:
train_df = pd.read_csv('./build/train_complex_utterance_to_code_with_intermediate_40k_2.csv.gz')
train_df.loc[0]

text            see if find my first reminders that I have a m...
code            person_reminded = Contact.resolve_from_text("m...
lang_rep        [ root\n\t[ S\n\t\t[ Command\n\t\t\t[ Action\n...
lang_rep_raw    [ root\n\t[ hd\n\t\t[ see ]\n\t]\n\t[ advcl\n\...
code_rep        \t[ Module\n\t\t[ person_reminded = Contact.re...
code_rep_raw    \t[ Module\n\t\t[ Assign\n\t\t\t[ Name\n\t\t\t...
Name: 0, dtype: object

In [None]:
train_df.loc[0, 'text']

In [492]:
train_df.loc[0, 'code']

'person_reminded = Contact.resolve_from_text("my")\ncontent = Content.resolve_from_text("I have a meeting at 3pm")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, content=content)\nreminders = first(reminders)\nResponder.respond(response=reminders)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)\n\nperson_reminded = Contact.resolve_from_text("I")\ndate_time = DateTime.resolve_from_text("mindnight in 2 days")\ncontent = Content.resolve_from_text("bring the keys")\nreminders = Reminders.find_reminders(person_reminded=person_reminded, date_time=date_time, content=content)\ntest_reminders = bool(reminders)\nResponder.respond(response=test_reminders)'

In [497]:
print(train_df.loc[0, 'lang_rep'])

[ root
	[ S
		[ Command
			[ Action
				[ hd
					[ see ]
				]
				[ advcl
					[ mark
						[ if ]
					]
					[ Command
						[ Action
							[ hd
								[ find ]
							]
							[ Arg
								[ obj
									[ nmod:poss
										[ my ]
									]
									[ amod
										[ first ]
									]
									[ hd
										[ reminders ]
									]
								]
							]
							[ mark
								[ that ]
							]
							[ ccomp
								[ Command
									[ Action
										[ Arg
											[ nsubj
												[ I ]
											]
										]
										[ hd
											[ have ]
										]
										[ Arg
											[ obj
												[ det
													[ a ]
												]
												[ hd
													[ meeting ]
												]
												[ nmod
													[ case
														[ at ]
													]
													[ hd
														[ 3 ]
													]
													[ nmod:tmod
														[ pm ]
													]
												]
											]
										]
									]
								]
								[ cc
									[ and 

In [503]:
print(train_df["lang_rep"].str.replace(r"\s+", " ", regex=True).iloc[0])

[ root [ S [ Command [ Action [ hd [ see ] ] [ advcl [ mark [ if ] ] [ Command [ Action [ hd [ find ] ] [ Arg [ obj [ nmod:poss [ my ] ] [ amod [ first ] ] [ hd [ reminders ] ] ] ] [ mark [ that ] ] [ ccomp [ Command [ Action [ Arg [ nsubj [ I ] ] ] [ hd [ have ] ] [ Arg [ obj [ det [ a ] ] [ hd [ meeting ] ] [ nmod [ case [ at ] ] [ hd [ 3 ] ] [ nmod:tmod [ pm ] ] ] ] ] ] ] [ cc [ and ] ] [ Command_conj [ Condition [ If [ Body [ Command [ Action [ expl [ there ] ] [ hd [ are ] ] ] ] [ cc [ and ] ] [ Command_conj [ Action [ hd [ see ] ] ] ] ] [ mark [ if ] ] [ Test [ Command [ Action [ Arg [ nsubj [ I ] ] ] [ hd [ got ] ] [ Arg [ obj [ det [ a ] ] [ hd [ reminder ] ] [ nmod [ case [ at ] ] [ hd [ mindnight ] ] ] ] ] [ Arg [ obl [ case [ in ] ] [ nummod [ 2 ] ] [ hd [ days ] ] ] ] [ mark [ to ] ] [ advcl [ Command [ Action [ hd [ bring ] ] [ Arg [ obj [ det [ the ] ] [ hd [ keys ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] ] 
