# Testing notebook
This is a testing notebook, serving no other purpose other than testing things out while developping.

## Initial implementation

In [None]:
import sys, os
sys.path.append("..")

import numpy as np
from random import randint

from src.models import OpenAIBot
from src.prompts.game24 import foa_step_prompt, value_prompt, value_last_step_prompt, bfs_prompt
from src.tasks.base import DATA_PATH
from src.tasks.game24 import Game24
from src.methods.agents import Agents
from src.methods.resampler import Resampler

%load_ext autoreload
%autoreload 2

bot = OpenAIBot(model="gpt-3.5-turbo-1106")
#bot = OpenAIBot(model="gpt-4")

In [None]:
# Prompt testing
from src.prompts.game24 import foa_step_prompt, value_prompt, value_last_step_prompt, bfs_prompt, cot_prompt
x = 20
input = "1 24"

prompt = foa_step_prompt.format(input=input) 

for i in range(x):
    response = bot.request(prompt)

print(f"Prompt \n{prompt}")
print("----\n----\n")
print(f"Response \n{response}")

In [None]:
# Game24 task step function

task = Game24(bot)
idx = randint(0, len(task))
task.get_input(idx)


value_numbers = []
for i in range(task.max_steps):
    task.step()
    value_number = task.evaluate()
    value_numbers.append(value_number)
summary = []

for i in range(len(task.steps)):
    temp = task.steps[i] + f" [Value : {value_numbers[i]}]"
    summary.append(temp)

print(f"Input: {task.input}")
print("\n".join(summary))

In [None]:
# Resampler

values = np.array([20, 10, 5, 1, 0.0001])
resampler = Resampler()

# Normalized resampling
draw = resampler.resample(values, resample_method="normalization")
print(f"Normalized : {draw}")


# Greedy
draw = resampler.resample(values, resample_method="greedy")
print(f"Greedy : {draw}")


In [None]:
# Multiple agents

idx_input = 8
n_evaluations = 2
n_agents = 2

# Create agents
agents = Agents(task=Game24, idx_input=idx_input, n_agents=n_agents, init=False, model=bot, n_evaluations=2)

# Run agents
for i in range(agents.max_steps-1):
    agents.step()
    agents.evaluate()
    agents.resample()

# Final step : Finish answers format + Choose best answer
agents.step() 

# Log results
current_path = os.getcwd()
log_path = os.path.join(os.path.dirname(current_path), "logs")
agents.create_log(repo_path=log_path)

In [None]:
# Test results funciton

idx_input = 8
n_evaluations = 3
n_agents = 5

# Create agents
agents = Agents(task=Game24, idx_input=idx_input, n_agents=n_agents, model=bot)

# Run agents
for i in range(agents.max_steps-1):
    agents.step()
    agents.evaluate(n=n_evaluations)
    agents.resample()

# Final step : Finish answers format + Choose best answer
agents.step() 

print("\n\n".join(["\n".join(agent.steps) for agent in agents.agents]))
print(f"\nResults : {agents.test_output()}")

## Async Implementation

In [None]:
import asyncio
# set up logging
import logging
import os
import json
import random
import argparse

import numpy as np
import pandas as pd
from diskcache import Cache
from datetime import datetime
from collections import Counter


# TODO: Not sure if this is correct, I didn't know how else to handle the package paths
import sys
sys.path.append(os.getcwd()) # Project root!!

from async_engine.cached_api import CachedOpenAIAPI
from async_engine.round_robin_manager import AsyncRoundRobin
from async_implementation.agents.gameof24 import GameOf24Agent
from async_implementation.states.gameof24 import GameOf24State
from utils import create_folder, email_notification
from async_engine.mock_batched_async import BatchingAPI

%load_ext autoreload
%autoreload 2

### Resampling

In [None]:
# Resampling methods

values = [0, 0.001, 0.003, 1, 3, 20, 60]

print(f"Linear probabilities : {[round(p, 3) for p in linear(values)]}")
print(f"Logistic probabilities : {[round(p, 3) for p in logistic(values)]}")

### Hash function

In [None]:
# Gameof24State Hash

path = 'data/24_tot.csv'
data = pd.read_csv(path).Puzzles.tolist()

puzzle_idx = 0
puzzle = data[puzzle_idx]

num_agents = 1

s1 = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))
s2 = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))
s3 = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=["something"], randomness=random.randint(0, 1000))
s4 = GameOf24State(puzzle=puzzle, current_state="24", steps=["something"], randomness=random.randint(0, 1000))

# Randomness does not count towards the hash function
hash(s1), hash(s2), hash(s3), hash(s4)

### Batched API

In [None]:
# Bactched API

# Cache setup
assert os.path.exists(
    "./caches/"), "Please run the script from the root directory of the project. To make sure all caches are created correctly."
cache = Cache("./caches/async_api_cache", size_limit=int(2e10))

# OpenAI API key setup
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
assert OPENAI_API_KEY is not None, "Please set the OPENAI_API_KEY environment variable"

# API setup
api_config = {
    "max_tokens": 100,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "model": "gpt-3.5-turbo"
}
api = CachedOpenAIAPI(cache, api_config)

# Limiter setup
limiter = AsyncRoundRobin()
N = 4
for _ in range(N):
    limiter.add_resource(data=OPENAI_API_KEY)

prompt = "What is the capital of France?"
messages = [{"role": "user", "content": prompt}]

#response = await api.uncached_request(messages, limiter, n=1)
#print(response)

In [None]:


# Setup batching API
batch_size = 2
bapi = BatchingAPI(batch_size, api, limiter)

In [None]:
bapi.immediate_request(messages, n=1)

In [None]:
print(f"Futures : {bapi.futures}")
print(f"Prompts : {bapi.prompts}")
print(f"Batches processed : {bapi.num_batches_processed}")

In [None]:
class State:
    prompt: str


class Agent:

    @staticmethod
    async def step(state: State, api: BatchingAPI):
        # make request
        result = await api.buffered_request(state.prompt)

        # do something with the result
        # ...

        return result

## Crosswords

In [None]:
import json

# Specify the path to the JSON file
file_path = "data/datasets/mini0505.json"

# Load the JSON file
with open(file_path, "r") as file:
    data = json.load(file)

In [None]:
import json
import os
from typing import List

class MiniCrosswordsEnv:
    def __init__(self, file='mini0505.json'):
        self.file = os.path.join("data", 'datasets', file)

        self.file = json.load(open(self.file))
        self.n = len(self.file)
        self.cache = {}
        self.idx = None
        self.times = 0
        self.prompt_status_cache = {}

    def __len__(self):
        return self.n

    def reset(self, idx, board=None, status=None, steps=None):
        self.idx = idx
        self.data, self.board_gt = self.file[idx]
        self.board = ['_'] * 25
        self.ans = ['_____'] * 10
        self.ans_gt = self.get_ans(self.board_gt)
        self.steps = 0
        self.status = [0] * 10  # 0: unfilled; 1: filled; 2: filled then changed
        if board is not None:
            self.board = board
            self.ans = self.get_ans(self.board)
        if status is not None:
            self.status = status
        if steps is not None:
            self.steps = steps
        return self.render()

    def prompt_status(self):
        count = {'sure': 0, 'maybe': 0, 'impossible': 0}
        for ans, data, status in zip(self.ans, self.data, self.status):
            # if status != 0: continue
            if ans.count('_') >= 4: 
                continue
            ans = ' '.join(ans.lower())
            line = f'{data}: {ans}'
            prompt = value_prompt.format(input=line)
            if prompt in self.prompt_status_cache:
                res = self.prompt_status_cache[prompt]
            else:
                res = gpt(prompt)[0]
                self.prompt_status_cache[prompt] = res
            # print(line)
            # print(res)
            # print()
            res = res.split('\n')[-1].strip()
            if res in count: count[res] += 1
        # print(count)
        return count

    def get_ans(self, board: List[str])-> List[str]:
        """"
        Given the board, return the answers.

        Example
            board: ['A', 'G', 'E', 'N', 'D', 'M', 'O', 'T', 'O', 'R', 'A', 'R', 'T', 'S', 'Y', 'S', 'A', 'L', 'L', 'E', 'S', 'L', 'E', 'E', 'R']
            ans: ['AGEND', 'MOTOR', 'ARTSY', 'SALLE', 'SLEER', 'AMASS', 'GORAL', 'ETTLE', 'NOSLE', 'DRYER']
        """
        ans = [''] * 10
        for i in range(5):
            ans[i] = ''.join(board[i*5:(i+1)*5])
        for i in range(5):
            ans[i+5] = ''.join(board[i::5])
        return ans

    def render_gt_board(self)-> str:
        """
        Renders the ground truth board.

        Example
            self.board_gt: ['A', 'G', 'E', 'N', 'D', 'M', 'O', 'T', 'O', 'R', 'A', 'R', 'T', 'S', 'Y', 'S', 'A', 'L', 'L', 'E', 'S', 'L', 'E', 'E', 'R']
            return: GT Board:
                    A G E N D
                    M O T O R
                    A R T S Y
                    S A L L E
                    S L E E R
        """
        s = "GT Board:\n"
        for i in range(5):
            s += ' '.join(self.board_gt[i*5:(i+1)*5]) + '\n'
        return s
    
    def render_board(self)-> str:
        """
        Renders the current board.

        Exactly same function as render_gt_board() but applied to self.board (current board) instead of self.board_gt (ground truth board).
        """
        s = "Current Board:\n"
        for i in range(5):
            s += ''.join(self.board[i*5:(i+1)*5]) + '\n'
        return s
    
    def render_clues(self, status=None)-> str:
        """
        Renders the clued/data. If status is not None, only render the clued/data with the given status.

        Example (first 3)
            self.data: ['An agendum; something to be done', 'An engine', 'Pretentious; flowery']
            return: h1. An agendum; something to be done
                    h2. An engine
                    h3. Pretentious; flowery
        """
        s = ""
        # s += "Horizontal:\n"
        for i in range(5):
            if status is None or self.status[i] == status:
                s += 'h' + str(i+1) + '. ' + self.data[i] + '\n'
        # s += "Vertical:\n"
        for i in range(5, 10):
            if status is None or self.status[i] == status:
                s += 'v' + str(i-5+1) + '. ' + self.data[i] + '\n'
        return s
    
    def render_gt_ans(self, status=None)-> str:
        """
        Renders the ground truth answers. If status is not None, only render the answers with the given status.

        Example (first 3)
            self.data: ['An agendum; something to be done', 'An engine', 'Pretentious; flowery']
            self.ans_gt: ['AGEND','MOTOR','ARTSY']
            return: h1. An agendum; something to be done: AGEND
                    h2. An engine: MOTOR
                    h3. Pretentious; flowery: ARTSY
        """
        s = ""
        # s += "Horizontal:\n"
        for i in range(5):
            if status is None or self.status[i] == status:
                s += 'h' + str(i+1) + '. ' + self.data[i] + ': ' + self.ans_gt[i] + '\n'
        # s += "Vertical:\n"
        for i in range(5, 10):
            if status is None or self.status[i] == status:
                s += 'v' + str(i-5+1) + '. ' + self.data[i] + ': ' + self.ans_gt[i] + '\n'
        return s
    
    def render_ans(self, status=None):
        """
        Renders the current answers. If status is not None, only render the answers with the given status.

        Exactly same function as render_gt_ans() but applied to self.ans (current answers) instead of self.ans_gt (ground truth answers).
        """
        s = ""
        # s += "Horizontal:\n"
        for i in range(5):
            if status is None or self.status[i] == status:
                s += 'h' + str(i+1) + '. ' + self.data[i] + ': ' + self.ans[i] + '\n'
        # s += "Vertical:\n"
        for i in range(5, 10):
            if status is None or self.status[i] == status:
                s += 'v' + str(i-5+1) + '. ' + self.data[i] + ': ' + self.ans[i] + '\n'
        return s
    
    def render(self, status=True):
        if status:
            return self.render_board() + '\nUnfilled:\n' + self.render_ans(status=0) + '\nFilled:\n' + self.render_ans(status=1) + '\nChanged:\n' + self.render_ans(status=2)
        else:
            return self.render_board() + '\n' + self.render_ans()
        
    def step(self, action):
        self.steps += 1
        action = action.split('\n')[-1]
        action = action.split('. ')
        if len(action) != 2:
            return 'Invalid! Format should be like "h1. apple"', 0, False, {}
        pos, word = action

        if len(word) != 5:
            return 'Invalid! Word should have 5 letters.', 0, False, {}
        if pos.startswith('h'):
            idx = int(pos[1:]) - 1
            self.board[idx*5:(idx+1)*5] = list(word.upper())
        elif pos.startswith('v'):
            idx = int(pos[1:]) - 1
            self.board[idx::5] = list(word.upper())
            idx += 5  # for later status update
        else:
            return 'Invalid! Position should be h1-h5 or v1-v5', 0, False, {}
        
        self.new_ans = self.get_ans(self.board)
        # self.status = [2 if (status == 1 and ans != new_ans) else status for status, ans, new_ans in zip(self.status, self.ans, self.new_ans)]
        self.status = [2 if any(letter != new_letter and letter != '_' for letter, new_letter in zip(ans, new_ans)) else status for status, ans, new_ans in zip(self.status, self.ans, self.new_ans)]
        self.status[idx] = 1
        self.ans = self.new_ans
        r_all = (self.board == self.board_gt)
        r_letter = sum(a == b for a, b in zip(self.board, self.board_gt)) / 25
        r_word = sum(a == b for a, b in zip(self.ans, self.ans_gt)) / 10
        return self.render(), r_all, (r_all or self.steps >= 20), {'r_letter': r_letter, 'r_word': r_word, 'r_game': r_all}

### Prompt

In [None]:
from async_implementation.prompts.crosswords import propose_prompt, value_prompt
import re
import asyncio

def prompt_wrap(obs):
    return propose_prompt.format(input=obs)

def parse_line(input_str):
    # regular expression pattern to match the input string format
    pattern = r'^([hv][1-5])\. ([a-zA-Z]{5,5}) \((certain|high|medium|low)\).*$'

    # use regex to extract the parts of the input string
    match = re.match(pattern, input_str)

    if match:
        # extract the matched groups
        parts = [match.group(1), match.group(2), match.group(3)]
        return parts
    else:
        return None

def parse_response(response):

    # map confidence levels to values
    confidence_to_value = {'certain': 1, 'high': 0.5, 'medium': 0.2, 'low': 0.1}  # TODO: ad hoc

    # split the response into lines
    lines = response.split('\n')

    # parse each line
    parsed_lines = [parse_line(line) for line in lines]

    # filter out the lines that didn't match the format
    parsed_lines = [(line[0].lower() + '. ' + line[1].lower(), confidence_to_value.get(line[2], 0)) for line in parsed_lines if line is not None]

    return parsed_lines if len(parsed_lines) >= 1 else None

async def get_candidates_to_scores(env, api, n=8):
    obs = env.render()
    if obs in env.cache: 
        print('cache hit')
        return env.cache[obs]
    print('call gpt')
    
    coroutines = []
    for _ in range(n):
        coroutines.append(api.buffered_request(prompt_wrap(obs)))
    responses = await asyncio.gather(*coroutines)
    candidates_to_scores = {}
    for response in responses:
        parsed_response = parse_response(response)
        if parsed_response:
            for candidate, score in parsed_response:
                candidates_to_scores[candidate] = candidates_to_scores.get(candidate, 0) + score
        # choose candiate with highest score
    # print(sorted(candidates_to_scores.items(), key=lambda x: x[1], reverse=True))
    env.cache[obs] = candidates_to_scores
    return candidates_to_scores

In [None]:


from async_engine.cached_api import CachedOpenAIAPI
from async_engine.round_robin_manager import AsyncRoundRobin
from async_engine.mock_batched_async import BatchingAPI
from diskcache import Cache

assert os.path.exists(
    "./caches/"), "Please run the script from the root directory of the project. To make sure all caches are created correctly."
cache = Cache("./caches/async_api_cache", size_limit=int(2e10))

# get OPENAI_API_KEY from env
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
assert OPENAI_API_KEY is not None, "Please set the OPENAI_API_KEY environment variable"

api_config = {
    "max_tokens": 100,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "model": "gpt-3.5-turbo"
}

api = CachedOpenAIAPI(cache, api_config)
limiter = AsyncRoundRobin()
# ToDo, this is a bit hacky. OpenAI allows multiple parallel requests per key, so we add the same key multiple times
N = 4
for _ in range(N):
    limiter.add_resource(data=OPENAI_API_KEY)

api = BatchingAPI(api, limiter, batch_size=2, timeout=None)


In [None]:
env = MiniCrosswordsEnv()
env.reset(0)

candidates_to_scores = await get_candidates_to_scores(env, api, n=8)

In [None]:
print(sorted(candidates_to_scores.items(), key=lambda x: x[1], reverse=True))
board, status, steps = env.board.copy(), env.status.copy(), env.steps

print(f"board : {board}")
print(f"status : {status}")
print(f"steps : {steps}")

In [None]:
for action in sorted(candidates_to_scores, key=candidates_to_scores.get, reverse=True):
    print(action)


In [None]:
action = "h2. motor"
obs, r, done, info = env.step(action)

print(f"obs : {obs}")
print(f"r : {r}")
print(f"done : {done}")
print(f"info : {info}")

In [None]:
# Step

board = ['_'] * 25
print(f"Board : {board}")

action = "h2. motor"
print(f"Action : '{action}'")

action = action.split('\n')[-1]
print(f"Action : '{action}'")

action = action.split('. ')
print(f"Action : '{action}'")

pos, word = action
print(f"Pos : '{pos}'")
print(f"Word : '{word}'")

if pos.startswith('h'):
    idx = int(pos[1:]) - 1
    board[idx*5:(idx+1)*5] = list(word.upper())
print(f"Board : {board}")

# Remove

In [None]:
# I'm keeping the same comments, etc everywhere, so that later it's easier to merge experiments.gameof24.py and experiments.crosswords.py (hydra?)

import asyncio
# set up logging
import logging
import os
import json
import random
import argparse

import numpy as np
import pandas as pd
from diskcache import Cache
from datetime import datetime
from collections import Counter


# TODO: Not sure if this is correct, I didn't know how else to handle the package paths
import sys
sys.path.append(os.getcwd()) # Project root!!

from async_engine.cached_api import CachedOpenAIAPI
from async_engine.round_robin_manager import AsyncRoundRobin
from async_engine.batched_api import BatchingAPI
from async_implementation.agents.crosswords import CrosswordsAgent
from async_implementation.states.crosswords import CrosswordsState
from utils import create_folder, email_notification

logger = logging.getLogger("experiments")
logger.setLevel(logging.DEBUG) # Order : debug < info < warning < error < critical
log_folder = f"logs/{datetime.now().date()}/{datetime.now().strftime('%H')}:00/" # Folder in which logs will be saved (organized daily)
create_folder(log_folder)

# you should use the same cache for every instance of CachedOpenAIAPI
# that way we never pay for the same request twice
assert os.path.exists(
    "./caches/"), "Please run the script from the root directory of the project. To make sure all caches are created correctly."
cache = Cache("./caches/async_api_cache", size_limit=int(2e10))

# get OPENAI_API_KEY from env
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
assert OPENAI_API_KEY is not None, "Please set the OPENAI_API_KEY environment variable"

api_config = {
    "max_tokens": 100,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "model": "gpt-3.5-turbo"
}

api = CachedOpenAIAPI(cache, api_config)
limiter = AsyncRoundRobin()
# ToDo, this is a bit hacky. OpenAI allows multiple parallel requests per key, so we add the same key multiple times
N = 4
for _ in range(N):
    limiter.add_resource(data=OPENAI_API_KEY)

# Set up Crosswords puzzles
path = "data/datasets/mini0505.json"
with open(path, "r") as file:
    dataset = json.load(file)

# ToDo: this should probably be moved to its own file
# for now I'm keeping it here, for easier debugging
#async def foa_gameof24(puzzle_idx: int, num_agents=3, k=2, backtrack=0.8):
async def foa_crosswords(puzzle_idx, foa_options):
    randomness = 0
    random.seed(randomness)

    data, board_gt = dataset[puzzle_idx] # Data is the list of clues, board_gt is the ground truth  board
    ans_gt = CrosswordsState.get_ans(board_gt) # Get the ground truth answers

    # Set up states
    states = []
    for _ in range(foa_options["num_agents"]):
        states.append(CrosswordsState(data=data, board_gt=board_gt, ans_gt=ans_gt, randomness=random.randint(0, 1000)))

    num_steps = foa_options["num_steps"]
    for step in range(num_steps):
        print(f"Step {step}")
    return 0



puzzle_idx = 1
data, board_gt = dataset[puzzle_idx] 
ans_gt = CrosswordsState.get_ans(board_gt)

state = CrosswordsState(data=data, board_gt=board_gt, ans_gt=ans_gt, randomness=0)

# Game states

In [1]:
import asyncio
# set up logging
import logging
import os
import json
import random
import argparse

import numpy as np
import pandas as pd
from diskcache import Cache
from datetime import datetime
from collections import Counter


# TODO: Not sure if this is correct, I didn't know how else to handle the package paths
import sys
sys.path.append(os.getcwd()) # Project root!!

from async_engine.cached_api import CachedOpenAIAPI
from async_engine.round_robin_manager import AsyncRoundRobin
from async_engine.batched_api import BatchingAPI
from async_implementation.agents.gameof24 import GameOf24Agent
from async_implementation.states.gameof24 import GameOf24State
from utils import create_folder, email_notification

logger = logging.getLogger("experiments")
logger.setLevel(logging.DEBUG) # Order : debug < info < warning < error < critical
log_folder = f"logs/{datetime.now().date()}/{datetime.now().strftime('%H')}:00/gameof24" # Folder in which logs will be saved (organized daily)
create_folder(log_folder)


# you should use the same cache for every instance of CachedOpenAIAPI
# that way we never pay for the same request twice
assert os.path.exists(
    "./caches/"), "Please run the script from the root directory of the project. To make sure all caches are created correctly."
cache = Cache("./caches/async_api_cache", size_limit=int(2e10))

# get OPENAI_API_KEY from env
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
assert OPENAI_API_KEY is not None, "Please set the OPENAI_API_KEY environment variable"

api_config = {
    "max_tokens": 100,
    "temperature": 0.7,
    "top_p": 1,
    "request_timeout": 45,
    "model": "gpt-3.5-turbo"
}

api = CachedOpenAIAPI(cache, api_config)
limiter = AsyncRoundRobin()
# ToDo, this is a bit hacky. OpenAI allows multiple parallel requests per key, so we add the same key multiple times
N = 4
for _ in range(N):
    limiter.add_resource(data=OPENAI_API_KEY)

# set up GameOf24 puzzles
path = 'data/datasets/24_tot.csv'
data = pd.read_csv(path).Puzzles.tolist()

# Use batching API
api = BatchingAPI(api, limiter, batch_size=1, timeout=10)

# Data
path = 'data/datasets/24_tot.csv'
data = pd.read_csv(path).Puzzles.tolist()

In [2]:
puzzle_idx = 0
puzzle = data[puzzle_idx]
state = GameOf24State(puzzle=puzzle, current_state=puzzle, steps=[], randomness=random.randint(0, 1000))

In [3]:
state.current_state

'1 1 4 6'

In [4]:
next_state = await GameOf24Agent.step(state, api)

In [5]:
next_state.current_state

'1 2 6'

In [6]:
def expression(state):
    states = state.current_state.split(" ")
    return states

function(state)

['1', '1', '4', '6']

In [13]:
my_list = [1, 2, 3, 4, 5]

# Pop the last element from the list
last_element = my_list.pop()
print(last_element)  # Output: 5
print(my_list)  # Output: [1, 2, 3, 4]

# Pop the element at index 2 from the list
element_at_index_2 = my_list.pop(2)
print(element_at_index_2)  # Output: 3
print(my_list)  # Output: [1, 2, 4]



3
[1, 2, 4, 5]
