In [1]:
import openai
import os

import pandas as pd
import tiktoken
from tqdm import tqdm

# from dotenv import load_dotenv, find_dotenv
# _ = load_dotenv(find_dotenv()) # read local .env file

# openai.api_key  = os.getenv('OPENAI_API_KEY')
openai.api_key  = ""

In [2]:
client = openai.AsyncOpenAI(
  api_key=openai.api_key,  # this is also the default, it can be omitted
)


async def get_completion(prompt_, model_="gpt-3.5-turbo"):
    messages_ = [{"role": "user", "content": prompt_}]
    response_ = await client.chat.completions.create(
        model=model_, 
        messages=messages_,
        temperature=0.00000001,
    )
    return response_.choices[0].message.content

# Set up prompt

In [3]:
str_term = "(λx.((λy.((λz.z) x)) (λa.a)))"
next_step_term = "(λx.((λy.y) (λa.a)))"

prompt = f"""
Please generate the next step of reduction a lambda term. Provide only term expression.

Lambda term: '''{str_term}'''
"""
response = await get_completion(prompt)
print(f"expected output: {next_step_term}")
print(f"model output: {response}")

expected output: (λx.((λy.y) (λa.a)))
model output: (λx.((λy.y) (λa.a)))


# Load the data

In [6]:
df = pd.read_csv("./data/term_step_LO.csv", delimiter=",")
df.head()

Unnamed: 0,term,term_next_LO
0,((λx.(λy.((y y) (λz.z)))) (λa.(((λb.b) (a (λc....,(λx.((x x) (λy.y)))
1,(((λx.(λy.((y y) (λz.((λa.(λb.(λc.(x (c (a c))...,((λx.((x x) (λy.((λz.(λa.(λb.((λc.(((λd.(c d))...
2,((λx.((x x) (λy.((λz.(λa.(λb.((λc.(((λd.(c d))...,((((λx.x) ((λy.y) (λz.(z (λa.z))))) ((λx.x) ((...
3,((((λx.x) ((λy.y) (λz.(z (λa.z))))) ((λx.x) ((...,((((λx.x) (λy.(y (λz.y)))) ((λa.a) ((λb.b) (λc...
4,((((λx.x) (λy.(y (λz.y)))) ((λa.a) ((λb.b) (λc...,(((λx.(x (λy.x))) ((λz.z) ((λa.a) (λb.(b (λc.b...


In [10]:
enc_tiktoken = tiktoken.encoding_for_model("gpt-3.5-turbo")
total_tokens = 0
for term in df["term"].tolist():
    total_tokens += len(enc_tiktoken.encode(term))

print(f"Total term tokens: {total_tokens}")

total_tokens = 0
for term in df["term_next_LO"].tolist():
    total_tokens += len(enc_tiktoken.encode(term))
    
print(f"Total expected tokens: {total_tokens}")

Total term tokens: 523365
Total expected tokens: 502995


# Reduce size of expected and input terms tokens

In [37]:
df_reduced = df[[len(enc_tiktoken.encode(term)) < 77 for term in df["term"].tolist()]]

In [38]:
len(df_reduced)

1019

In [39]:
total_tokens = 0
for term in df_reduced["term"].tolist():
    total_tokens += len(enc_tiktoken.encode(term))

print(f"Total term tokens: {total_tokens}")

total_tokens = 0
for term in df_reduced["term_next_LO"].tolist():
    total_tokens += len(enc_tiktoken.encode(term))
    
print(f"Total expected tokens: {total_tokens}")

Total term tokens: 52265
Total expected tokens: 44817


# Normalize terms with gpt-3.5-turbo model

In [44]:
term_answers = []

for str_term in tqdm(df_reduced["term"].tolist()):
    prompt = f"""
    Please generate the next step of reduction a lambda term. Provide only term expression.
    
    Lambda term: '''{str_term}'''
    """
    
    response = await get_completion(prompt)
    term_answers.append(response)

100%|██████████| 1019/1019 [25:18<00:00,  1.49s/it]


In [45]:
term_answers

['(λy.(λz.((λa.(((λb.(a b)) ((λc.(λd.(λe.(c (d (λj.j)))))) (λi.(λn.(λm.i))))) (z ((λi.(λn.(λm.i))) z)))))',
 '(λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (λe.e)))))) x)) x)) (y ((λj.(λi.(λn.j))) y))))',
 '(λy.(((y ((λz.(λa.(λb.z))) y)) ((λc.(λd.(λe.(c (d (λj.j)))))) (y ((λi.(λn.(λm.i))) y)))) (y ((λt.(λr.(λq.t))) y)))',
 '(λy.(((y (λz.(λa.y))) ((λb.(λc.(λd.(b (c (λe.e)))))) (y ((λj.(λi.(λn.j))) y)))) (y ((λm.(λt.(λr.m))) y)))',
 '(λx.(λy.(((y (λz.(λa.y))) (λb.(λc.((y ((λd.(λe.(λj.d))) y)) (b (λi.i)))))) (y ((λn.(λm.(λt.n))) y))))',
 '(λy.(((y (λz.(λa.y))) (λb.(λc.((y (λd.(λe.y))) (b (λj.j)))))) (y ((λi.(λn.(λm.i))) y)))',
 '(λy.(λz.(((((λa.(λb.((z (((((x z) z) (λc.(λd.(z c)))) ((y z) y)) y)) z))) (λe.z)) (λj.j)) ((z (λi.y)) (λn.y))) ((z y) z))))',
 '(λy.(λz.((((λa.((z (((((x z) z) (λb.(λc.(z b)))) ((y z) y)) y)) z)) (λd.d)) ((z (λe.y)) (λj.y))) ((z y) z))))',
 '(λx.(λy.(y (λz.((y (z (((x (λa.((y y) (((λb.y) x) (λc.(λd.x)))))) x) (λe.(λj.(j ((λi.i) y))))))) (x (λn.n))))))) (λx.(λy.(y (

In [51]:
df_reduced = df_reduced.reset_index(drop=True)

In [52]:
df_reduced["gpt3.5_answers"] = term_answers

In [54]:
df_reduced.head()

Unnamed: 0,term,term_next_LO,gpt3.5_answers
0,((λx.(λy.(λz.((λa.(((λb.(a b)) ((λc.(λd.(λe.(c...,(λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (...,(λy.(λz.((λa.(((λb.(a b)) ((λc.(λd.(λe.(c (d (...
1,(λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (...,(λx.(λy.(((λz.((y ((λa.(λb.(λc.a))) y)) z)) ((...,(λx.(λy.((λz.(((λa.(z a)) ((λb.(λc.(λd.(b (c (...
2,(λx.(λy.(((y ((λz.(λa.(λb.z))) y)) ((λc.(λd.(λ...,(λx.(λy.(((y (λz.(λa.y))) ((λb.(λc.(λd.(b (c (...,(λy.(((y ((λz.(λa.(λb.z))) y)) ((λc.(λd.(λe.(c...
3,(λx.(λy.(((y (λz.(λa.y))) ((λb.(λc.(λd.(b (c (...,(λx.(λy.(((y (λz.(λa.y))) (λb.(λc.((y ((λd.(λe...,(λy.(((y (λz.(λa.y))) ((λb.(λc.(λd.(b (c (λe.e...
4,(λx.(λy.(((y (λz.(λa.y))) (λb.(λc.((y ((λd.(λe...,(λx.(λy.(((y (λz.(λa.y))) (λb.(λc.((y (λd.(λe....,(λx.(λy.(((y (λz.(λa.y))) (λb.(λc.((y ((λd.(λe...


In [55]:
df_reduced.to_csv("./data/gpt3.5_answers.csv", index=False)