In [1]:
import sys
import os 

WORK_AREA = "/Users/asaf/Workspace/biu/complex-utterance-to-code"
os.chdir(WORK_AREA)

paths = ['./src/', './src/api/v6']
for path in paths:
    path = os.path.normcase(path)
    if not any(os.path.normcase(sp) == path for sp in sys.path):
        sys.path.append(path)

In [2]:
from typing import List, Union, Optional, TypeVar, Generic
import pandas as pd
import ast
import math
import glob
from representations.tree.tree import Tree
from representations.builders.ast.tearers.tearer_factory import TearerFactory
import tokenize
from nltk.translate import bleu_score

In [3]:
def parse_code_rep_to_code(code_rep: str, verbose: str = 'Fatal') -> str:
  try:
    tree = Tree.unparse(code_rep)
    tearer = TearerFactory().get_tearer(tree.root_node)
    asdl = tearer.tear(tree.root_node)
    code = ast.unparse(asdl)
  except Exception as e:
    if verbose == 'Error':
      print(f"[Error] failed to prase code rep to code:\n", e)
    code = ''
  finally:
    return code


def build_test_code(code: str, imports: str, test: str, code_embed_str: str = '# end code block to test', fail_on_error: bool = False, verbose: str = 'Fatal'):
  try:
    code_insert_idx = test.find(code_embed_str)
    program_code = imports
    program_code += '\n'
    program_code += test[:code_insert_idx]
    program_code += code
    program_code += '\n'
    program_code += test[code_insert_idx:]
  except Exception as e:
    if verbose == 'Error':
      print('[ERROR] Failed to unparse code rep to code\n', e)
    if fail_on_error:
      raise e
    program_code = ''
  finally:
    return program_code
  
  
def tokenize_source(code):
    file_path = "/tmp/example.py"

    with open(file_path, "w") as text_file:
        text_file.write(code)
        
    with open(file_path, 'rb') as f:
        tokens_gen = tokenize.tokenize(f.readline)

        tokens = [token.string for token in tokens_gen]
        
    os.remove(file_path)
    return tokens


def eval_code(code: str):
  test_results = {}
  try:
    context = {}
    exec(code, context)
    test_results = context.get('test_results', {})
  except AssertionError as e:
    test_results['test_failuers'] = test_results.get('test_failuers', 0) + 1
  except Exception as e:
    test_results['code_failure'] = test_results.get('code_failure', 0) + 1

  code_failure = test_results.get('code_failure', 0)
  correct = test_results.get('correct', 0)
  incorrect = test_results.get('incorrect', 0)
  total = (correct + incorrect) or math.inf
  accuracy = (1 - code_failure) * (correct / total)

  results = dict(
    code_failure = code_failure,
    correct = correct,
    incorrect = incorrect,
    accuracy = accuracy,
  )

  return results


def eval_bleu(code, generated_code):
  hypothesis = tokenize_source(code)
  reference = tokenize_source(generated_code)
  weights = (0.25, 0.25, 0.25, 0.25)
  score = bleu_score.sentence_bleu([reference], hypothesis, weights=weights)
  return score
  

In [4]:
def humaneval_accuracy_score(
    data: pd.DataFrame, 
    code_column_name: str = 'pred_code', 
    score_id_labels: Union[str, List[str]] = 'sample_id', 
    score_column_name: str = 'accuracy', 
):
    test_codes = data.apply(lambda x: build_test_code(code=x[code_column_name], imports=x['imports'], test=x['test']), axis=1)
    test_results = test_codes.apply(lambda test_code: eval_code(test_code))
    test_results_df = pd.DataFrame.from_records(
        test_results.values, index=test_results.index
    )
    score = test_results_df.reset_index(drop=False).groupby(score_id_labels)[score_column_name].mean().mean()
    return dict(score=score, results=test_results_df)


def bleu_accuracy_score(
    data: pd.DataFrame, 
    generated_column='output', 
    gold_column='code',
    score_id_labels: Union[str, List[str]] = 'sample_id', 
    score_column_name: str = 'bleu_score', 
):
    eval_results = data.apply(lambda x: eval_bleu(x[gold_column], x[generated_column]), axis=1)
    eval_results_df = eval_results.to_frame('bleu_score')
    score = eval_results_df.reset_index(drop=False).groupby(score_id_labels)[score_column_name].mean().mean()
    return dict(score=score, results=eval_results_df)

In [5]:
def model_eval(
    results_file_path, 
    output_column='output', 
    gold_column='code', 
    parse_to_code=False, 
    compute_humanval=True, 
    compute_bleu=True
):
    results_df = pd.read_csv(results_file_path, compression='gzip')
    
    results_df['sample_id'] = results_df['sample_id'].astype(int)
    results_df.set_index(['sample_id', 'sample_minor_id'], inplace=True)
    results_df.sort_index(inplace=True)
    
    code_column = 'generated_code'
    if parse_to_code:
        results_df[code_column] = results_df[output_column].apply(lambda x: parse_code_rep_to_code(x))
    else: 
        results_df[code_column] = results_df[output_column]
        
    results_df['test'] = results_df['test'].str.replace("= next(iterator)", "= next(iterator, None)")
    results_df[code_column] = results_df[code_column].str.replace(" = ContentType.", " = MessageContentType.")
    results_df[code_column] = results_df[code_column].str.replace("Message.", "Messages.")
    results_df[code_column] = results_df[code_column].str.replace("Calendar.purchase_ticket(", "Calendar.purchase_tickets(")

    humaneval_results = humaneval_accuracy_score(
        data=results_df, 
        code_column_name=code_column)
    
    bleu_results = bleu_accuracy_score(
        data=results_df, 
        generated_column=code_column, 
        gold_column=gold_column) 
    
    results = dict(
        humaneval = humaneval_results,
        bleu = bleu_results
    )
    return results

In [6]:
result = model_eval(
    results_file_path="~/Downloads/results/codet5-small-rep2rep-test-82-2023-05-17_133852.csv.gz", 
    output_column='output', 
    gold_column='code', 
    parse_to_code=True, 
    compute_humanval=False, 
    compute_bleu=True
)
result['bleu']

{'score': 0.3202581743011407,
 'results':                               bleu_score
 sample_id sample_minor_id               
 0         NaN              7.290246e-232
 1         a                 2.261735e-01
           b                 2.261735e-01
 2         NaN               1.867093e-01
 3         a                 2.667071e-01
 ...                                  ...
 87        NaN               2.220809e-01
 88        a                 4.800686e-01
           b                 4.800686e-01
 92        NaN               3.714730e-01
 94        NaN               2.282202e-01
 
 [82 rows x 1 columns]}

In [7]:
df = pd.read_csv('build/train_complex_utterance_to_code_with_intermediate_40k.csv.gz', compression='gzip')
df = df.sample(n=10, replace=True, random_state=42)

In [8]:
results_file_path = "~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz"
results_df = pd.read_csv(results_file_path, compression='gzip')
results_df.columns

Index(['test_id', 'sample_id', 'sample_minor_id', 'text', 'code', 'test',
       'imports', 'lang_rep', 'code_rep', 'lang_rep_pretty', 'code_rep_pretty',
       'output', 'target'],
      dtype='object')

In [9]:
results_df['sample_id'] = results_df['sample_id'].astype(int)
results_df.set_index(['sample_id', 'sample_minor_id'], inplace=True)
results_df.sort_index(inplace=True)
results_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,test_id,text,code,test,imports,lang_rep,code_rep,lang_rep_pretty,code_rep_pretty,output,target
sample_id,sample_minor_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,,0,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,"product_name = ProductName.resolve_from_text(""...","product_name = ProductName.resolve_from_text(""..."
1,a,1_a,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"product_name = ProductName.resolve_from_text(""...","date_time = DateTime.resolve_from_text(""tomorr..."
1,b,1_b,"If it's raining tomorrow morning, set my alarm...","date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,[ root [ S [ Command [ Condition [ If [ Test [...,[ Module [ date_time = DateTime.resolve_from_t...,"product_name = ProductName.resolve_from_text(""...","date_time = DateTime.resolve_from_text(""tomorr..."
2,,2,Play the new Taylor Swift album and pull up my...,"album = Album.resolve_from_text(""the new Taylo...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Play ] ] ...,[ Module [ album = Album.resolve_from_text('th...,[ root [ S [ Command [ Action [ hd [ Play ] ] ...,[ Module [ album = Album.resolve_from_text('th...,"location = Location.resolve_from_text(""the new...","album = Album.resolve_from_text(""the new Taylo..."
3,a,3_a,Send a message to dad if it rains tomorrow.,"date_time = DateTime.resolve_from_text(""tomorr...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Condition [ If [ Body [...,[ Module [ date_time = DateTime.resolve_from_t...,[ root [ S [ Command [ Condition [ If [ Body [...,[ Module [ date_time = DateTime.resolve_from_t...,"recipient = Contact.resolve_from_text(""Dad"")\n...","date_time = DateTime.resolve_from_text(""tomorr..."


In [10]:
i = 0
print(results_df.iloc[i]['text'])
print('\n')
print(results_df.iloc[i]['code'])
print('\n')
print(results_df.iloc[i]['output'])

Check the availability of Pepsi at Walmart and also check it at Walgreens.


product_name = ProductName.resolve_from_text("Pepsi")
location = Location.resolve_from_text("Walmart")
products = Shopping.find_products(product_name=product_name, location=location)
Responder.respond(response=products)

location = Location.resolve_from_text("Walgreens")
products = Shopping.find_products(product_name=product_name, location=location)
Responder.respond(response=products)



product_name = ProductName.resolve_from_text("Pepsi")
product_attribute = ProductAttribute.resolve_from_text("a availability")
location = Location.resolve_from_text("at Walmart")
products = Shopping.find_products(product_name=product_name, product_attribute=product_attribute, location=location)
Responder.respond(response=products)
location = Location.resolve_from_text("at Walgreens")
products = utils.filter(products, location=location)
test_products = bool(products)
Responder.respond(response=test_products)

product_name = Pr

In [11]:
test_code = build_test_code(results_df.iloc[0]['output'], results_df.iloc[0]['imports'], results_df.iloc[0]['test'])
test_code = test_code.replace("= next(iterator)", "= next(iterator, None)")
print(test_code)

from entities.generic import *
from entities.calendar import *
from entities.home import *
from entities.map import *
from entities.message import *
from entities.music import *
from entities.navigation import *
from entities.reminder import *
from entities.shopping import *
from entities.weather import *
from actions.calendar import *
from actions.clock import *
from actions.calendar import *
from actions.home import *
from actions.map import *
from actions.messages import *
from actions.music import *
from actions.navigation import *
from actions.reminders import *
from actions.responder import *
from actions.shopping import *
from actions.weather import *
from providers.data_model import DataModel
from datetime import datetime, timedelta
import utils.api_utils as utils
from utils.test_utils import *

# test data
data_model = DataModel(reset=True)
data_product_name_pepsi = ProductName(text="Pepsi")
data_model.append(data_product_name_pepsi)
data_product_name_coca = ProductName(text="

In [12]:
eval_code(test_code)

{'code_failure': 1, 'correct': 0, 'incorrect': 0, 'accuracy': 0.0}

In [13]:
context = {}
exec(test_code, context)
test_results = context.get('test_results', {})

TypeError: object of type 'NoneType' has no len()

In [14]:
from entities.generic import *
from entities.calendar import *
from entities.home import *
from entities.map import *
from entities.message import *
from entities.music import *
from entities.navigation import *
from entities.reminder import *
from entities.shopping import *
from entities.weather import *
from actions.calendar import *
from actions.clock import *
from actions.calendar import *
from actions.home import *
from actions.map import *
from actions.messages import *
from actions.music import *
from actions.navigation import *
from actions.reminders import *
from actions.responder import *
from actions.shopping import *
from actions.weather import *
from providers.data_model import DataModel
from datetime import datetime, timedelta
import utils.api_utils as utils
from utils.test_utils import *

# test data
data_model = DataModel(reset=True)
data_product_name_pepsi = ProductName(text="Pepsi")
data_model.append(data_product_name_pepsi)
data_product_name_coca = ProductName(text="coca cola")
data_model.append(data_product_name_coca)
data_location1 = Location(text="Walmart")
data_model.append(data_location1)
data_location2 = Location(text="Walgreens")
data_model.append(data_location2)
data_location3 = Location(text="CVS")
data_model.append(data_location3)
data_product1 = ProductEntity(
    product_name=data_product_name_pepsi, location=data_location1
)
data_model.append(data_product1)
data_product2 = ProductEntity(
    product_name=data_product_name_pepsi, location=data_location2
)
data_model.append(data_product2)
data_product3 = ProductEntity(
    product_name=data_product_name_coca, location=data_location1
)
data_model.append(data_product3)
data_product4 = ProductEntity(
    product_name=data_product_name_coca, location=data_location2
)
data_model.append(data_product4)
data_product5 = ProductEntity(
    product_name=data_product_name_pepsi, location=data_location3
)
data_model.append(data_product5)
data_product6 = ProductEntity(
    product_name=data_product_name_pepsi, location=data_location1
)
data_model.append(data_product6)

# start code block to test
product_name = ProductName.resolve_from_text("Pepsi")
product_attribute = ProductAttribute.resolve_from_text("a availability")
location = Location.resolve_from_text("at Walmart")
products = Shopping.find_products(product_name=product_name, product_attribute=product_attribute, location=location)
Responder.respond(response=products)
location = Location.resolve_from_text("at Walgreens")
products = utils.filter(products, location=location)
test_products = bool(products)
Responder.respond(response=test_products)

product_name = ProductName.resolve_from_text("Pepsi")
location = Location.resolve_from_text("at Walgreens")
order = Shopping.order(product_name=product_name, location=location)
# end code block to test

# assertions
test_results = {}

iterator = iter(data_model.get_response([ProductEntity]))

expected = [data_product1, data_product6]
actual = next(iterator, None)
response_assertions(expected, actual, test_results)

expected = [data_product2]
actual = next(iterator, None)
response_assertions(expected, actual, test_results)

assert_test(test_results)


TypeError: object of type 'NoneType' has no len()

In [15]:
test_results

{'correct': 2}

In [16]:
results_df['test'] = results_df['test'].str.replace("= next(iterator)", "= next(iterator, None)")
results_df['output'] = results_df['output'].str.replace(" = ContentType.", " = MessageContentType.")
results_df['output'] = results_df['output'].str.replace("Message.", "Messages.")

In [17]:

humaneval_accuracy_score(
    data=results_df, 
    code_column_name = 'output', 
    score_id_labels = 'sample_id', 
    score_column_name = 'accuracy', 
)

{'score': 0.3885044642857143,
 'results':                            code_failure  correct  incorrect  accuracy
 sample_id sample_minor_id                                            
 0         NaN                         1        0          0  0.000000
 1         a                           0        0          1  0.000000
           b                           0        0          1  0.000000
 2         NaN                         0        0          2  0.000000
 3         a                           0        2          0  1.000000
 ...                                 ...      ...        ...       ...
 87        NaN                         1        0          0  0.000000
 88        a                           0        6          0  1.000000
           b                           0        0          2  0.000000
 92        NaN                         0        2          1  0.666667
 94        NaN                         0        0          2  0.000000
 
 [82 rows x 4 columns]}

In [18]:
print(results_df.loc[88, 'a']['text'])
print('\n')
print(results_df.loc[88, 'a']['code'])
print('\n')
print(results_df.loc[88, 'a']['output'])

Check if it's supposed to rain tonight and if it's not text Brian that I want to go out tonight


data_weather_attribute = WeatherAttribute.resolve_from_text("rain")
date_time = DateTime.resolve_from_text("tonight")
weather_forecasts = Weather.find_weather_forecasts(
    date_time=date_time, weather_attribute=data_weather_attribute
)
Responder.respond(response=weather_forecasts)

test_weather_forecasts = bool(weather_forecasts)
if not test_weather_forecasts:
    message_content_type = MessageContentType.resolve_from_text("text")
    recipient = Contact.resolve_from_text("Brian")
    content = Content.resolve_from_text("I want to go out tonight")
    Messages.send_message(
        message_content_type=message_content_type,
        recipient=recipient,
        content=content,
    )



product_name = ProductName.resolve_from_text("rain")
product_attribute = ProductAttribute.resolve_from_text("tonight")
products = Shopping.find_products(product_name=product_name, product_attribute=product

In [19]:
product_name = ProductName.resolve_from_text("rain")
product_attribute = ProductAttribute.resolve_from_text("tonight")
products = Shopping.find_products(product_name=product_name, product_attribute=product_attribute)
Responder.respond(response=products)

message_content_type = MessageContentType.resolve_from_text("text")
recipient = Contact.resolve_from_text("Brian")
content = Content.resolve_from_text("I want to go out tonight")
Messages.send_message(recipient=recipient, content=content, message_content_type=message_content_type)

<entities.message.MessageEntity at 0x169d53a90>

In [20]:
!ls ~/Downloads/results

codet5-base-rep2rep-test-82-2023-05-18_161634.csv.gz
codet5-small-rep2code-test-92-2023-05-21_012655.csv.gz
codet5-small-rep2rep-test-82-2023-05-17_133852.csv.gz
codet5-small-text2code-test-92-2023-05-21_012655.csv.gz
codet5-small-text2rep-test-92-2023-05-21_012655.csv.gz
codet5-small-text_rep2rep-test-92-2023-05-21_012655.csv.gz
codet5p-220m-rep2rep-test-82-2023-05-18_181859.csv.gz
codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz
codet5p-220m-text2code-test-82-2023-05-20_233005.csv.gz
codet5p-220m-textrep2rep-test-92-2023-05-19_191947.csv.gz


In [21]:
model_eval(
    results_file_path="~/Downloads/results/codet5-small-rep2rep-test-82-2023-05-17_133852.csv.gz", 
    output_column='output', 
    gold_column='code', 
    parse_to_code=True, 
    compute_humanval=True, 
    compute_bleu=False
)

{'humaneval': {'score': 0.2726190476190476,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         1        0          0  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  87        NaN                         0        2          1  0.666667
  88        a                           1        0          0  0.000000
            b                           1        0          0  0.000000
  92        NaN                         0        2          1  0.666667
  94        NaN                         0        0          2  0.000000
  
  [82 

In [22]:
model_eval(
    results_file_path="~/Downloads/results/codet5-base-rep2rep-test-82-2023-05-18_161634.csv.gz",
    parse_to_code=True,
)

{'humaneval': {'score': 0.2374627976190476,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          2  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  87        NaN                         0        0          2  0.000000
  88        a                           1        0          0  0.000000
            b                           1        0          0  0.000000
  92        NaN                         0        2          1  0.666667
  94        NaN                         0        0          2  0.000000
  
  [82 

In [23]:
model_eval(
    results_file_path="~/Downloads/results/codet5p-220m-rep2rep-test-82-2023-05-18_181859.csv.gz",
    parse_to_code=True,
)

{'humaneval': {'score': 0.28140219155844154,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          2  0.000000
  3         a                           0        1          1  0.500000
  ...                                 ...      ...        ...       ...
  87        NaN                         0        0          2  0.000000
  88        a                           0        3          2  0.600000
            b                           0        2          1  0.666667
  92        NaN                         0        2          1  0.666667
  94        NaN                         1        0          0  0.000000
  
  [82

In [24]:
model_eval(results_file_path = "~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz")

{'humaneval': {'score': 0.4106398809523809,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          2  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  87        NaN                         0        2          1  0.666667
  88        a                           0        6          0  1.000000
            b                           0        0          2  0.000000
  92        NaN                         0        2          1  0.666667
  94        NaN                         0        0          2  0.000000
  
  [82 

In [25]:
model_eval(results_file_path = "~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz")

{'humaneval': {'score': 0.4106398809523809,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          2  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  87        NaN                         0        2          1  0.666667
  88        a                           0        6          0  1.000000
            b                           0        0          2  0.000000
  92        NaN                         0        2          1  0.666667
  94        NaN                         0        0          2  0.000000
  
  [82 

In [26]:
df = pd.read_csv("~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz")
df.set_index('test_id', inplace=True)

In [27]:
test_id = '88_a'
print(df.loc[test_id]['text'])
print(df.loc[test_id]['output'])

Check if it's supposed to rain tonight and if it's not text Brian that I want to go out tonight
product_name = ProductName.resolve_from_text("rain")
product_attribute = ProductAttribute.resolve_from_text("tonight")
products = Shopping.find_products(product_name=product_name, product_attribute=product_attribute)
Responder.respond(response=products)

message_content_type = ContentType.resolve_from_text("text")
recipient = Contact.resolve_from_text("Brian")
content = Content.resolve_from_text("I want to go out tonight")
Message.send_message(recipient=recipient, content=content, message_content_type=message_content_type)


In [28]:
model_eval(
    results_file_path = "~/Downloads/results/codet5-small-text2rep-test-92-2023-05-21_012655.csv.gz",
    parse_to_code=True
)

{'humaneval': {'score': 0.22916666666666666,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           1        0          0  0.000000
            b                           1        0          0  0.000000
  2         NaN                         1        0          0  0.000000
  3         a                           1        0          0  0.000000
  ...                                 ...      ...        ...       ...
  104       b                           1        0          0  0.000000
  105       NaN                         0        2          3  0.400000
  108       NaN                         0        0          2  0.000000
  109       a                           0        2          1  0.666667
            b                           0        1          0  1.000000
  
  [92

In [10]:
model_eval(
    results_file_path = "~/Downloads/results/codet5-small-rep2code-test-92-2023-05-21_012655.csv.gz",
    parse_to_code=True
)

{'humaneval': {'score': 0.16666666666666666,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         0        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          1  0.000000
  3         a                           0        0          1  0.000000
  ...                                 ...      ...        ...       ...
  104       b                           0        1          0  1.000000
  105       NaN                         0        0          1  0.000000
  108       NaN                         0        0          2  0.000000
  109       a                           0        2          1  0.666667
            b                           0        1          0  1.000000
  
  [92

In [18]:
model_eval(
    results_file_path = "~/Downloads/results/codet5-small-text2code-test-92-2023-05-21_012655.csv.gz",
    parse_to_code=False
)

{'humaneval': {'score': 0.2887152777777778,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         0        0          0  0.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         1        0          0  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  104       b                           1        0          0  0.000000
  105       NaN                         1        0          0  0.000000
  108       NaN                         0        3          1  0.750000
  109       a                           0        1          2  0.333333
            b                           0        0          1  0.000000
  
  [92 

In [15]:
model_eval(
    results_file_path = "~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-20_233005.csv.gz",
    parse_to_code=False
)

{'humaneval': {'score': 0.25998883928571426,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         0        2          0  1.000000
  1         a                           0        0          1  0.000000
            b                           0        0          1  0.000000
  2         NaN                         0        0          1  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  87        NaN                         1        0          0  0.000000
  88        a                           0        3          1  0.750000
            b                           0        0          1  0.000000
  92        NaN                         1        0          0  0.000000
  94        NaN                         0        1          2  0.333333
  
  [82

In [65]:
df = pd.read_csv("~/Downloads/results/codet5p-220m-text2code-test-82-2023-05-20_233005.csv.gz")
df.set_index('test_id', inplace=True)

In [76]:
print(df.loc['92']['text'])

test_code = build_test_code(test=df.loc['92']['test'], imports=df.loc['92']['imports'], code=df.loc['92']['generated_code'])
print('# TEST CODE: \n')
print(test_code)
print('# GENERATED CODE\n')
print(df.loc['92']['generated_code'])
print('# GOLD CODE: \n')
print(df.loc['92']['code'])


Look up free events for this weekend and let me know what the weather will be.
# TEST CODE: 

from entities.generic import *
from entities.calendar import *
from entities.home import *
from entities.map import *
from entities.message import *
from entities.music import *
from entities.navigation import *
from entities.reminder import *
from entities.shopping import *
from entities.weather import *
from actions.calendar import *
from actions.clock import *
from actions.calendar import *
from actions.home import *
from actions.map import *
from actions.messages import *
from actions.music import *
from actions.navigation import *
from actions.reminders import *
from actions.responder import *
from actions.shopping import *
from actions.weather import *
from providers.data_model import DataModel
from datetime import datetime, timedelta
import utils.api_utils as utils
from utils.test_utils import *

# test data
data_model = DataModel(reset=True)
data_event_name1 = EventName(text="free even

In [29]:
from entities.generic import *
from entities.calendar import *
from entities.home import *
from entities.map import *
from entities.message import *
from entities.music import *
from entities.navigation import *
from entities.reminder import *
from entities.shopping import *
from entities.weather import *
from actions.calendar import *
from actions.clock import *
from actions.calendar import *
from actions.home import *
from actions.map import *
from actions.messages import *
from actions.music import *
from actions.navigation import *
from actions.reminders import *
from actions.responder import *
from actions.shopping import *
from actions.weather import *
from providers.data_model import DataModel
from datetime import datetime, timedelta
import utils.api_utils as utils
from utils.test_utils import *

# test data
data_model = DataModel(reset=True)
data_event_name1 = EventName(text="free events", value="art show")
data_model.append(data_event_name1)
data_event_name2 = EventName(text="free events", value="community event")
data_model.append(data_event_name2)
data_date_time1 = DateTime(
    text="this weekend", value=datetime.now() + timedelta(days=0)
)
data_model.append(data_date_time1)
data_date_time2 = DateTime(
    text="this weekend", value=datetime.now() + timedelta(days=0)
)
data_model.append(data_date_time2)
data_model.append(
    data_event1 := EventEntity(
        event_name=data_event_name1, date_time=data_date_time1
    )
)
data_model.append(
    data_event2 := EventEntity(
        event_name=data_event_name1, date_time=data_date_time2
    )
)
data_model.append(
    data_event3 := EventEntity(
        event_name=data_event_name2, date_time=data_date_time2
    )
)
data_model.append(
    data_weather_forecast1 := WeatherForecastEntity(date_time=data_date_time1)
)
data_model.append(
    data_weather_forecast2 := WeatherForecastEntity(date_time=data_date_time2)
)

# start code block to test
event_names = EventName.resolve_many_from_text("a weekend") or []
events = []
for event_name in event_names:
  events += Calendar.find_events(event_name=event_name)
Responder.respond(response=events)

weather_forecasts = Weather.find_weather_forecasts()
Responder.respond(response=weather_forecasts)
# end code block to test

# assertions
test_results = {}

iterator = iter(data_model.get_response([EventEntity]))
actual = next(iterator)
expected = [data_event1, data_event2, data_event3]
response_assertions(expected, actual, test_results)

iterator = iter(data_model.get_response([WeatherForecastEntity]))
actual = next(iterator)
expected = [data_weather_forecast1, data_weather_forecast2]
response_assertions(expected, actual, test_results)

assert_test(test_results)

test_results

<bound method Kernel.raw_input of <ipykernel.ipkernel.IPythonKernel object at 0x106b3cd60>>
<bound method Kernel.raw_input of <ipykernel.ipkernel.IPythonKernel object at 0x106b3cd60>>


{'incorrect': 1,
 'results': [{'message': 'Assertion failed', 'actual': 3, 'expected': 0}],
 'correct': 2}

In [None]:
code_insert_idx = test.find(code_embed_str)

In [None]:
event_names = EventName.resolve_many_from_text("a weekend")
events = []
for event_name in event_names:
  events += Calendar.find_events(event_name=event_name)
Responder.respond(response=events)

weather_forecasts = Weather.find_weather_forecasts()
Responder.respond(response=weather_forecasts)

In [64]:
model_eval(
    results_file_path = "~/Downloads/results/codet5-small-text_rep2rep-test-92-2023-05-21_012655.csv.gz",
    parse_to_code=True
)

{'humaneval': {'score': 0.1936728395061728,
  'results':                            code_failure  correct  incorrect  accuracy
  sample_id sample_minor_id                                            
  0         NaN                         1        0          0  0.000000
  1         a                           1        0          0  0.000000
            b                           1        0          0  0.000000
  2         NaN                         1        0          0  0.000000
  3         a                           0        2          0  1.000000
  ...                                 ...      ...        ...       ...
  104       b                           1        0          0  0.000000
  105       NaN                         1        0          0  0.000000
  108       NaN                         1        0          0  0.000000
  109       a                           0        1          2  0.333333
            b                           0        0          1  0.000000
  
  [92 

In [63]:
train_file = 'build/train_complex_utterance_to_code_with_intermediate_40k.csv.gz'
train_df = pd.read_csv(train_file, compression='gzip')
train_df.columns

Index(['text', 'code', 'lang_rep', 'code_rep'], dtype='object')

In [30]:
results_file_paths = glob.glob('dist/experiments_results/*')
results_file_paths = sorted(results_file_paths)
for results_file_path in results_file_paths:
    print('======================')
    print(results_file_path)
    results = model_eval(
        results_file_path=results_file_path,
        parse_to_code=('2rep' in results_file_path),
        compute_humanval=True, 
        compute_bleu=True
    )
    print(f"humaneval = {results['humaneval']['score']}")
    print(f"bleu = {results['bleu']['score']}")

dist/experiments_results/codet5-base-rep2rep-test-82-2023-05-18_161634.csv.gz
humaneval = 0.2374627976190476
bleu = 0.3278433989205229
dist/experiments_results/codet5-small-rep2rep-test-82-2023-05-17_133852.csv.gz
humaneval = 0.2726190476190476
bleu = 0.3202581743011407
dist/experiments_results/codet5p-220m-rep2code-test-82-2023-05-19_195010.csv.gz
humaneval = 0.32451636904761905
bleu = 0.3985260555673632
dist/experiments_results/codet5p-220m-rep2rep-test-82-2023-05-18_181859.csv.gz
humaneval = 0.28140219155844154
bleu = 0.31814920633090105
dist/experiments_results/codet5p-220m-rep2rep-test-92-2023-05-19_013955.csv.gz
humaneval = 0.27678872053872056
bleu = 0.3162414934401745
dist/experiments_results/codet5p-220m-text2code-test-82-2023-05-18_202622.csv.gz
humaneval = 0.4106398809523809
bleu = 0.4233537338536305
dist/experiments_results/codet5p-220m-text2rep-test-82-2023-05-19_201014.csv.gz
humaneval = 0.2886997767857143
bleu = 0.3260169581216692
dist/experiments_results/codet5p-220m-tex

In [31]:
results_file_paths = glob.glob('/Users/asaf/Downloads/results/*')
results_file_paths = sorted(results_file_paths)
for results_file_path in results_file_paths:
    print('======================')
    print(results_file_path)
    results = model_eval(
        results_file_path=results_file_path,
        parse_to_code=('2rep' in results_file_path),
        compute_humanval=True, 
        compute_bleu=True
    )
    print(f"humaneval = {results['humaneval']['score']}")
    print(f"bleu = {results['bleu']['score']}")

/Users/asaf/Downloads/results/codet5-base-rep2rep-test-82-2023-05-18_161634.csv.gz
humaneval = 0.2374627976190476
bleu = 0.3278433989205229
/Users/asaf/Downloads/results/codet5-small-rep2code-test-92-2023-05-21_012655.csv.gz
humaneval = 0.2553240740740741
bleu = 0.3861922262132963
/Users/asaf/Downloads/results/codet5-small-rep2rep-test-82-2023-05-17_133852.csv.gz
humaneval = 0.2726190476190476
bleu = 0.3202581743011407
/Users/asaf/Downloads/results/codet5-small-text2code-test-92-2023-05-21_012655.csv.gz
humaneval = 0.322974537037037
bleu = 0.4223815800053465
/Users/asaf/Downloads/results/codet5-small-text2rep-test-92-2023-05-21_012655.csv.gz
humaneval = 0.22916666666666666
bleu = 0.3449000113961361
/Users/asaf/Downloads/results/codet5-small-text_rep2rep-test-92-2023-05-21_012655.csv.gz
humaneval = 0.1957561728395062
bleu = 0.32684914846622926
/Users/asaf/Downloads/results/codet5p-220m-rep2rep-test-82-2023-05-18_181859.csv.gz
humaneval = 0.28140219155844154
bleu = 0.31814920633090105
/U

In [57]:
os.getcwd()

'/Users/asaf/Workspace/biu'

In [33]:
df = pd.read_csv('/Users/asaf/Downloads/results/codet5-small-text2code-test-92-2023-05-21_012655.csv.gz', compression='gzip')
df.set_index('test_id', inplace=True)
df.sort_index(inplace=True)
df.head()

Unnamed: 0_level_0,Unnamed: 0,sample_id,sample_minor_id,text,code,test,imports,lang_rep,code_rep,text_lang_rep,lang_rep_pretty,code_rep_pretty,output,target
test_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,4,0,,Check the availability of Pepsi at Walmart and...,"product_name = ProductName.resolve_from_text(""...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,Text: Check the availability of Pepsi at Walma...,[ root [ S [ Command [ Action [ hd [ Check ] ]...,[ Module [ product_name = ProductName.resolve_...,"product_name = ProductName.resolve_from_text(""...","product_name = ProductName.resolve_from_text(""..."
10,14,10,,Set a timer for one hour and text Stacy that d...,"duration = DateTime.resolve_from_text(""one hou...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Set ] ] [...,[ Module [ duration = DateTime.resolve_from_te...,Text: Set a timer for one hour and text Stacy ...,[ root [ S [ Command [ Action [ hd [ Set ] ] [...,[ Module [ duration = DateTime.resolve_from_te...,"event_name = EventName.resolve_from_text(""a ti...","duration = DateTime.resolve_from_text(""one hou..."
102,84,102,,Set an alarm for 7:30am and notify me with a r...,"date_time = DateTime.resolve_from_text(""7:30am...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ hd [ Set ] ] [...,[ Module [ date_time = DateTime.resolve_from_t...,Text: Set an alarm for 7:30am and notify me wi...,[ root [ S [ Command [ Action [ hd [ Set ] ] [...,[ Module [ date_time = DateTime.resolve_from_t...,"event_name = EventName.resolve_from_text(""an a...","date_time = DateTime.resolve_from_text(""7:30am..."
104_a,55,104,a,In the event that Jessica messages with road c...,"sender = Contact.resolve_from_text(""Jessica"")\...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ Arg [ obl [ ca...,[ Module [ sender = Contact.resolve_from_text(...,Text: In the event that Jessica messages with ...,[ root [ S [ Command [ Action [ Arg [ obl [ ca...,[ Module [ sender = Contact.resolve_from_text(...,messages = Message.find_messages()\nMessage.de...,"sender = Contact.resolve_from_text(""Jessica"")\..."
104_b,73,104,b,In the event that Jessica messages with road c...,"sender = Contact.resolve_from_text(""Jessica"")\...",# test data\ndata_model = DataModel(reset=True...,from entities.generic import *\nfrom entities....,[ root [ S [ Command [ Action [ Arg [ obl [ ca...,[ Module [ sender = Contact.resolve_from_text(...,Text: In the event that Jessica messages with ...,[ root [ S [ Command [ Action [ Arg [ obl [ ca...,[ Module [ sender = Contact.resolve_from_text(...,messages = Message.find_messages()\nMessage.de...,"sender = Contact.resolve_from_text(""Jessica"")\..."


In [None]:
df[df[]]