In [1]:
import sys
import re

import pandas as pd
import numpy as np

from tqdm import tqdm

sys.path.append("../")
from calculus_path_mod.term_engine import *
from calculus_path_mod.reduction_strategy import *
from calculus_path_mod.terms import num_comparison, nat_numbers, arithm_ops, combinators, pairs, logic

from calculus_path_mod.terms.pseudonym import *

from tests_14_redexes_coloring.lambda_code_interpreter import LambdaCalculusInterpreter

# Load data

In [6]:
gpt35_full_df = pd.read_csv("./data/gpt3.5_answers.csv")
gpt35_df = pd.read_csv("./data/gpt3.5_answers_reduced.csv")
gpt4_df = pd.read_csv("./data/gpt4_answers.csv")

# Interprete terms

In [7]:
lc_interpreter = LambdaCalculusInterpreter()

In [5]:
test_term_str = gpt35_full_df["term_next_LO"].tolist()[0]

In [20]:
fixed_test_term_str = test_term_str.replace("λ", "@")

res = lc_interpreter.process_commands(f"#define tmp_term = {fixed_test_term_str}#")
print(res)

Defined term could be found & reduced by name: 'tmp_term'


In [13]:
res = lc_interpreter.process_commands(f"#help")
print(res)

#help -- to call this menu

#show-syntax -- show syntax used for typing lambda terms
#show-strategies -- show available strategies
#show-lib -- show all available terms in the terms library
#show-all -- show all terms defined by user
#show TERM_NAME -- to get description by the term
#show term_name -- to show a defined term by term name
#show-full TERM_NAME -- to get a full term definition by the term nem
#show-full term_name -- to show a full term definition of a defined term by term name

#import /path_to_lib/lib_file.lmd -- for including terms from other file. 
                It must have a name in which will be defined a term
#define term_name = term_definition # -- for defining term in the memory,
                MUST ENDS on '#' symbol for allowing multiline input
                 
#reduce term_name STRATEGY_NAME -- reduce term by term_name with defined strategy
                reduced term will appear in term_name_red_strategy_name, 
                STRATEGY_NAME isn't mandator

In [21]:
res = lc_interpreter.process_commands(f"#show-full tmp_term")
print(res)

Full terms description:
* tmp_term == (λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (λe.e)))))) z)) z)) (y ((λj.(λi.(λn.j))) y)))))


In [22]:
test_term_str

'(λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (λe.e)))))) z)) z)) (y ((λj.(λi.(λn.j))) y)))))'

# Weak accuracy estimation for GPT3.5 full results

In [45]:
true_count = 0
all_terms = len(gpt35_full_df["term_next_LO"].tolist())

for term_lo_str, term_exp_str in zip(gpt35_full_df["term_next_LO"].tolist(), gpt35_full_df["gpt3.5_answers"].tolist()):
    aaa = term_exp_str.replace('λ', '@').replace('\'', '')
    if "->" in aaa:
        aaa = aaa.split("->")[1]
    if "=>" in aaa:
        aaa = aaa.split("=>")[1]
    if "→" in aaa:
         aaa = aaa.split("→")[1]
        
    res = lc_interpreter.process_commands(f"#define tmp_term = {aaa}#")
    if "Error" in res or "ERROR" in res:
        continue
    res = lc_interpreter.process_commands(f"#show-full tmp_term")
    term_exp_fix_var_srt = res.split(" == ")[1].replace("\n", "")
    if term_exp_fix_var_srt == term_lo_str:
        true_count += 1
        
print(f"Accuracy on GPT-3.5 full: {true_count / all_terms * 100.0}")

Accuracy on GPT-3.5 full: 6.084396467124632


# Weak accuracy estimation for GPT3.5

In [44]:
true_count = 0
all_terms = len(gpt35_df["term_next_LO"].tolist())

for term_lo_str, term_exp_str in zip(gpt35_df["term_next_LO"].tolist(), gpt35_df["gpt3.5_answers"].tolist()):
    aaa = term_exp_str.replace('λ', '@').replace('\'', '')
    if "->" in aaa:
        aaa = aaa.split("->")[1]
    if "=>" in aaa:
        aaa = aaa.split("=>")[1]
    
    res = lc_interpreter.process_commands(f"#define tmp_term = {aaa}#")
    if "Error" in res or "ERROR" in res:
        continue
    res = lc_interpreter.process_commands(f"#show-full tmp_term")
    term_exp_fix_var_srt = res.split(" == ")[1].replace("\n", "")
    if term_exp_fix_var_srt == term_lo_str:
        true_count += 1
        
print(f"Accuracy on GPT-3.5: {true_count / all_terms * 100.0}")

Accuracy on GPT-3.5: 19.34426229508197


# Weak accuracy estimation for GPT4

In [43]:
true_count = 0
all_terms = len(gpt4_df["term_next_LO"].tolist())

for term_lo_str, term_exp_str in zip(gpt4_df["term_next_LO"].tolist(), gpt4_df["gpt4_answers"].tolist()):
    aaa = term_exp_str.replace('λ', '@').replace('\'', '')
    if "->" in aaa:
        aaa = aaa.split("->")[1]
    if "=>" in aaa:
        aaa = aaa.split("=>")[1]
    
    res = lc_interpreter.process_commands(f"#define tmp_term = {aaa}#")
    if "Error" in res or "ERROR" in res:
        continue
    res = lc_interpreter.process_commands(f"#show-full tmp_term")
    term_exp_fix_var_srt = res.split(" == ")[1].replace("\n", "")
    if term_exp_fix_var_srt == term_lo_str:
        true_count += 1
        
print(f"Accuracy on GPT-4: {true_count / all_terms * 100.0}")

Accuracy on GPT-4: 41.31147540983607
