In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
from dotenv import load_dotenv
import json
from lib.utils import black_print
import openai
import polars as pl
from pydantic import BaseModel

load_dotenv()

df = (
    pl.read_parquet("./data/corbt/connections-games/*.parquet")
    .sort("createdAt")
    .cast({"id": pl.Int64})
    .rename({"startingBoard": "starting_board"})
)
df

id,board,createdAt,name,starting_board
i64,str,"datetime[μs, UTC]",str,str
1,"""{""WET WEATHER"": {""level"": 0, ""…",2023-06-12 00:00:00 UTC,"""Connections #1""","""[[""SNOW"", ""LEVEL"", ""SHIFT"", ""K…"
2,"""{""FOOTWEAR"": {""level"": 0, ""mem…",2023-06-13 00:00:00 UTC,"""Connections #2""","""[[""PUMP"", ""FOOT"", ""TIME"", ""SEA…"
3,"""{""FACIAL FEATURES"": {""level"": …",2023-06-14 00:00:00 UTC,"""Connections #3""","""[[""AMIGO"", ""MOUTH"", ""LAB"", ""ST…"
4,"""{""SNEAKER BRANDS"": {""level"": 0…",2023-06-15 00:00:00 UTC,"""Connections #4""","""[[""DUST"", ""CATS"", ""SPIDER"", ""C…"
5,"""{""STREAMING SERVICES"": {""level…",2023-06-16 00:00:00 UTC,"""Connections #5""","""[[""MUSTARD"", ""TARTAR"", ""PLUM"",…"
…,…,…,…,…
596,"""{""SMALL AMOUNT OF FOOD TO TRY""…",2025-01-27 00:00:00 UTC,"""Connections #596""","""[[""OLIVE"", ""TYPEWRITER"", ""EXTR…"
597,"""{""FUNNY PERSON"": {""level"": 0, …",2025-01-28 00:00:00 UTC,"""Connections #597""","""[[""DEMON"", ""LAUGH"", ""JOKER"", ""…"
598,"""{""OWNED"": {""level"": 0, ""member…",2025-01-29 00:00:00 UTC,"""Connections #598""","""[[""WICKED"", ""GINGERBREAD"", ""FU…"
599,"""{""INTANGIBLE QUALITY"": {""level…",2025-01-30 00:00:00 UTC,"""Connections #599""","""[[""HALO"", ""RIGATONI"", ""AIR"", ""…"


In [3]:
class ConnectionGroup(BaseModel):
    level: int
    members: list[str]


class ConnectionGame(BaseModel):
    board: dict[str, ConnectionGroup]
    starting_board: list[list[str]]
    createdAt: datetime.datetime
    id: int
    name: str


games: list[ConnectionGame] = [
    ConnectionGame(
        board={
            group_name: ConnectionGroup(
                level=group_contents["level"],
                members=group_contents["members"],
            )
            for group_name, group_contents in json.loads(d["board"]).items()
        },
        starting_board=json.loads(d["starting_board"]),
        createdAt=d["createdAt"],
        id=d["id"],
        name=d["name"],
    )
    for d in df.to_dicts()
]
games

[ConnectionGame(board={'WET WEATHER': ConnectionGroup(level=0, members=['HAIL', 'RAIN', 'SLEET', 'SNOW']), 'NBA TEAMS': ConnectionGroup(level=1, members=['BUCKS', 'HEAT', 'JAZZ', 'NETS']), 'KEYBOARD KEYS': ConnectionGroup(level=2, members=['OPTION', 'RETURN', 'SHIFT', 'TAB']), 'PALINDROMES': ConnectionGroup(level=3, members=['KAYAK', 'LEVEL', 'MOM', 'RACECAR'])}, starting_board=[['SNOW', 'LEVEL', 'SHIFT', 'KAYAK'], ['HEAT', 'TAB', 'BUCKS', 'RETURN'], ['JAZZ', 'HAIL', 'OPTION', 'RAIN'], ['SLEET', 'RACECAR', 'MOM', 'NETS']], createdAt=datetime.datetime(2023, 6, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo(key='UTC')), id=1, name='Connections #1'),
 ConnectionGame(board={'FOOTWEAR': ConnectionGroup(level=0, members=['BOOT', 'LOAFER', 'PUMP', 'SNEAKER']), 'UNITS OF LENGTH': ConnectionGroup(level=1, members=['FOOT', 'LEAGUE', 'MILE', 'YARD']), 'MAGAZINES': ConnectionGroup(level=2, members=['ESSENCE', 'PEOPLE', 'TIME', 'US']), 'LETTER HOMOPHONES': ConnectionGroup(level=3, members=['ARE', 'QUEUE', 'SEA

In [4]:
client = openai.AsyncOpenAI()
chat_completion = await client.chat.completions.create(
    messages=[
        {
            "role": "user",
            "content": "Hello, world!",
        }
    ],
    model="gpt-4o-mini",
)
black_print(chat_completion)

ChatCompletion(
    id="chatcmpl-AzWkMiS5O00MT92xtmHen3I6KxLSz",
    choices=[
        Choice(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=ChatCompletionMessage(
                content="Hello! How can I assist you today?",
                refusal=None,
                role="assistant",
                audio=None,
                function_call=None,
                tool_calls=None,
            ),
        )
    ],
    created=1739227250,
    model="gpt-4o-mini-2024-07-18",
    object="chat.completion",
    service_tier="default",
    system_fingerprint="fp_72ed7ab54c",
    usage=CompletionUsage(
        completion_tokens=10,
        prompt_tokens=11,
        total_tokens=21,
        completion_tokens_details=CompletionTokensDetails(
            accepted_prediction_tokens=0,
            audio_tokens=0,
            reasoning_tokens=0,
            rejected_prediction_tokens=0,
        ),
        prompt_tokens_details=PromptTokensDet

In [21]:
# Prompts from https://github.com/lechmazur/nyt-connections
prompts = [
    "Find groups of four items that share something in common. Output them in the following format: four total lines. On each line, there should be four comma-separated items. No additional text (like group titles or descriptions) should be in the output. Also, there should not be anything in your output before or after the solution.",
    "Group words that share a common thread. There are four words for each common thread. Output them in the following format: four total lines. On each line, there should be four comma-separated items. No additional text (like group titles or descriptions) should be in the output. Also, there should not be anything in your output before or after the solution.",
    "This is a puzzle. Create four groups of four. Words in each group fit under a specific category. Some categories might be defined by their use of wordplay (palindromes, homophones, adding or dropping letters and words) rather than the literal meanings of the words on the cards. Output them in the following format: four total lines. On each line, there should be four comma-separated items. No additional text (like group titles or descriptions) should be in the output. Also, there should not be anything in your output before or after the solution.",
]

for prompt in prompts:
    for game in games:
        content = (
            f"{prompt}\n\n{"\n".join(" ".join(row) for row in game.starting_board)}"
        )
        print(content)
        chat_completion = await client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": content,
                }
            ],
            model="o3-mini",
        )
        black_print(chat_completion)
        assistant_content = chat_completion.choices[0].message.content
        assert assistant_content is not None
        assistant_content = assistant_content.strip()
        groups = [
            frozenset(word.strip() for word in line.split(","))
            for line in assistant_content.split("\n")
        ]
        print(groups)
        score = 0
        for group_name, group_contents in game.board.items():
            if set(group_contents.members) in groups:
                score += 1 / len(game.board)
        print(f"{game.name}: {score}")
        break
    break

Find groups of four items that share something in common. Output them in the following format: four total lines. On each line, there should be four comma-separated items. No additional text (like group titles or descriptions) should be in the output. Also, there should not be anything in your output before or after the solution.

SNOW LEVEL SHIFT KAYAK
HEAT TAB BUCKS RETURN
JAZZ HAIL OPTION RAIN
SLEET RACECAR MOM NETS
ChatCompletion(
    id="chatcmpl-AzWzS24kM1XfR8OfyKgqn8zuEzkkV",
    choices=[
        Choice(
            finish_reason="stop",
            index=0,
            logprobs=None,
            message=ChatCompletionMessage(
                content="SNOW, HAIL, RAIN, SLEET\nLEVEL, KAYAK, RACECAR, MOM\nSHIFT, TAB, RETURN, OPTION\nHEAT, BUCKS, JAZZ, NETS",
                refusal=None,
                role="assistant",
                audio=None,
                function_call=None,
                tool_calls=None,
            ),
        )
    ],
    created=1739228186,
    model

In [None]:
def do_two_lists_have_same_elements(list1, list2):
   #to lowercase
   list1 = [x.lower() for x in list1]
   list2 = [x.lower() for x in list2]
   if len(list1) != len(list2):
      return False
   return set(list1) == set(list2)


def run_eval(filep, all_extracted):
    used = set()
    scores = {}
    with open(filep, 'r') as file:
        lines = file.read().strip().split('\n')
        total_score = 0
        count = 0
        index = 0
        while index < len(lines):
            try:
                num = int(lines[index].strip())
                used.add(num)
                groups = lines[index + 1:index + 5]
                index += 5  # Move to the next section

                # Process groups
                processed = []
                for g in groups:
                    # Remove text after '//' if any
                    g = g.split('//')[0]
                    # Remove text after ' - ' if any
                    g = g.split(' - ')[0]
                    splitg = g.split(',')
                    stripped = [
                        x.strip().strip('\'"')
                        .replace('1. ', '')
                        .replace('2. ', '')
                        .replace('3. ', '')
                        .replace('4. ', '')
                        .replace('<eos>', '')
                        for x in splitg
                    ]
                    # Remove text in parentheses
                    stripped = [x.split('(')[0].strip() for x in stripped]
                    # Handle colon ':' splitting
                    stripped = [
                        max(x.split(':'), key=lambda part: part.count(','))
                        if ':' in x else x
                        for x in stripped
                    ]
                    # Remove text before period '.' if any
                    stripped = [
                        x.split('.', 1)[1].strip()
                        if '.' in x and len(x.split('.', 1)) > 1 else x
                        for x in stripped
                    ]
                    # Remove text after dash '-' if any
                    stripped = [x.split(' - ')[0].strip() for x in stripped]
                    processed.append(stripped[:4])

                # Now compute the score
                compared_to = all_extracted[num].split('\n')
                score = 0
                for f in range(0, 4):
                    description = compared_to[f * 6]
                    words = compared_to[f * 6 + 1:f * 6 + 5]

                    match_found = False
                    for res in processed:
                        if do_two_lists_have_same_elements(res, words):
                            match_found = True
                            break

                    if match_found:
                        score += 1 / 4

                scores[num] = score
                total_score += score
                count += 1

            except ValueError:
                # If we can't convert to int, move to the next line
                index += 1
                continue

        print("Total score = ", total_score)
        print("Total count = ", count)
        print("Percentage = ", round(total_score / count * 100.0, 2))
    return used, list(scores.values())

In [29]:
pl.read_csv("./data/eric27n/NYT-Connections/Connections_Data.csv").drop_nulls()

Game ID,Puzzle Date,Word,Group Name,Group Level,Starting Row,Starting Column
i64,str,str,str,i64,i64,i64
1,"""2023-06-12""","""SNOW""","""WET WEATHER""",0,1,1
1,"""2023-06-12""","""LEVEL""","""PALINDROMES""",3,1,2
1,"""2023-06-12""","""SHIFT""","""KEYBOARD KEYS""",2,1,3
1,"""2023-06-12""","""KAYAK""","""PALINDROMES""",3,1,4
1,"""2023-06-12""","""HEAT""","""NBA TEAMS""",1,2,1
…,…,…,…,…,…,…
599,"""2024-12-31""","""BLAST""","""FUN TIME""",1,3,4
599,"""2024-12-31""","""BOLT""","""MOVE QUICKLY""",0,4,1
599,"""2024-12-31""","""KICK""","""FUN TIME""",1,4,2
599,"""2024-12-31""","""TO""","""WORDS BEFORE AN ADDRESSEE""",2,4,3


In [26]:
import json

data = json.load(open("./data/tm21cy/NYT-Connections/ConnectionsFinalDataset.json"))
data

[{'date': '2024/06/03',
  'contest': 'NYT Connections 358 - June 3rd, 2024',
  'words': ['LASER',
   'PLUCK',
   'THREAD',
   'WAX',
   'COIL',
   'SPOOL',
   'WIND',
   'WRAP',
   'HONEYCOMB',
   'ORGANISM',
   'SOLAR PANEL',
   'SPREADSHEET',
   'BALL',
   'MOVIE',
   'SCHOOL',
   'VITAMIN'],
  'answers': [{'answerDescription': 'REMOVE, AS BODY HAIR',
    'words': ['LASER', 'PLUCK', 'THREAD', 'WAX']},
   {'answerDescription': 'TWIST AROUND',
    'words': ['COIL', 'SPOOL', 'WIND', 'WRAP']},
   {'answerDescription': 'THINGS MADE OF CELLS',
    'words': ['HONEYCOMB', 'ORGANISM', 'SOLAR PANEL', 'SPREADSHEET']},
   {'answerDescription': 'B-___',
    'words': ['BALL', 'MOVIE', 'SCHOOL', 'VITAMIN']}],
  'difficulty': 3.3},
 {'date': '2024/06/02',
  'contest': 'NYT Connections 357 - June 2nd, 2024',
  'words': ['FOLLOWERS',
   'LEMMINGS',
   'PUPPETS',
   'SHEEP',
   'EQUITY',
   'OPTIONS',
   'SHARES',
   'STOCKS',
   'BILLINGS',
   'BUFFALO',
   'MOBILE',
   'PHOENIX',
   'APARTMENT',
   '