# NYT Connections Notebook

**PLEASE READ:** Python notebooks are a pain in the ass to try and merge in Github. This means that if you make an edit here, but someone else already made changes to this file, then trying to complete a git merge will be much harder for this file than, say, a normal Python file. This ultimately boils down to an ipynb *technically* being a JSON, and there's a lot of things going on under the hood that makes conflicts much more likely (incidentally, this is also the reason why if you and multiple people try to work on the same file on Google Colab, you're going to get messages about "unable to save local changes" and conflicts). As a result, **please do not modify this file.** Instead, **create a copy of this file and make your changes there** (e.g. `connections-notebook-[your-name].ipynb`).

In [1]:
import numpy as np
import pandas as pd

import gzip
import json
import random
import re
import io
import os
from dotenv import load_dotenv
from collections import Counter

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from datasets import load_dataset
from transformers import BertTokenizer, BertModel
import torch
import gensim.downloader as api
from gensim.models.word2vec import Word2Vec
from gensim.models import KeyedVectors
from itertools import combinations
from openai import OpenAI

## Load Games & Models

In [2]:
# Read in games from HuggingFace dataset
df_ = pd.read_csv("hf://datasets/eric27n/NYT-Connections/Connections_Data.csv")
df_['Word'] = df_['Word'].fillna("NA")
df_['Word'] = df_['Word'].str.lower()
df_['Group Name'] = df_['Group Name'].str.lower()
grouped = df_.groupby('Game ID')
result = []

for game_id, group in grouped:
  words = group['Word'].tolist()
  group_by_name = group.groupby('Group Name')
  solution = []
  
  for group_name, sub_group in group_by_name:
    group_words = sub_group['Word'].tolist()
    reason = sub_group['Group Name'].iloc[0]
    solution.append({'words': group_words, 'reason': reason})

  result.append({'words': words, 'solution': {'groups': solution}})

ds = result
ds_len = len(ds)
print(len(ds), ds[0])

628 {'words': ['snow', 'level', 'shift', 'kayak', 'heat', 'tab', 'bucks', 'return', 'jazz', 'hail', 'option', 'rain', 'sleet', 'racecar', 'mom', 'nets'], 'solution': {'groups': [{'words': ['shift', 'tab', 'return', 'option'], 'reason': 'keyboard keys'}, {'words': ['heat', 'bucks', 'jazz', 'nets'], 'reason': 'nba teams'}, {'words': ['level', 'kayak', 'racecar', 'mom'], 'reason': 'palindromes'}, {'words': ['snow', 'hail', 'rain', 'sleet'], 'reason': 'wet weather'}]}}


In [3]:
# Import different models
# model_google = api.load('word2vec-google-news-300')
# model_glove = api.load('glove-wiki-gigaword-300')
# model_wiki = api.load('fasttext-wiki-news-subwords-300')

# print(f"GOOGLE NEWS: {model_google.most_similar('seattle')}")
# print(f"GLOVE: {model_glove.most_similar('seattle')}")
# print(f"WIKI: {model_wiki.most_similar('seattle')}")

# Additional fourth model
# From my tests, this model did the best, albeit it requires a large download beforehand
# NEVER UPLOAD THE ZIPPED OR UNZIPPED TEXT FILE TO GITHUB
#     IF YOU DO, YOU WILL GET AN ERROR AND TRYING TO UNDO THESE CHANGES WILL BE A PAIN IN THE ASS
# https://github.com/commonsense/conceptnet-numberbatch
gzipped_file_path = 'numberbatch-en-19.08.txt.gz'
with gzip.open(gzipped_file_path, 'rt', encoding='utf-8') as f_in:
    decompressed_data = f_in.read()
decompressed_file = io.BytesIO(decompressed_data.encode('utf-8'))
model_numberbatch = KeyedVectors.load_word2vec_format(decompressed_file, binary=False)
print(f"NUMBERBATCH: {model_numberbatch.most_similar('seattle')}")

NUMBERBATCH: [('university_of_washington', 0.9806408286094666), ('space_needle', 0.9797334671020508), ('seattleite', 0.9641170501708984), ('emerald_city', 0.9455490112304688), ('tacoma', 0.7643471360206604), ('spokane', 0.7531239986419678), ('portland', 0.7523225545883179), ('lake_chelan', 0.7268684506416321), ('washington', 0.7256752252578735), ('kennewick', 0.7251202464103699)]


## Evaluate on one round

In [8]:
# Preprocess multi-word expressions (e.g. 'New York', 'push-up')
def preprocess_word(word, model):
  """
  Preprocess multi-word expressions (MWE) for accomodation by word2vec models.

  Args:
      word (str): The word to preprocess.
      model (gensim.models.word2vec): The word2vec model to check for MWE.

  Returns:
      str: The preprocessed word.
  """
  mwe = re.sub(r'[-\s]', '_', word.lower())
  
  if mwe not in model:
      mwe = re.sub(r'_', '', mwe)
  
  return mwe

In [9]:
# Extract words from ds[i]['words']
def guess(model, words):
  """
  Guess the best 4 words to form a group based on word similarity.
  
  Args:
      model (gensim.models.word2vec): The word2vec model to use.
      words (list): A list of words to process.
  
  Returns:
      list: A list of the best 4 words to form a group.
  """
  
  # Preprocess words for the model, create similarity matrix to find similarities among words
  words = [preprocess_word(word, model) for word in words]
  similarity_matrix = np.zeros((len(words), len(words)))
  for i, word1 in enumerate(words):
      for j, word2 in enumerate(words):
          if word1 in model and word2 in model:
              similarity_matrix[i, j] = model.similarity(word1, word2)
          else:
              similarity_matrix[i, j] = 0

  # Convert the similarity matrix to a DataFrame for easier manipulation
  similarity_df = pd.DataFrame(similarity_matrix, index=words, columns=words)
  _max = 0
  argmax = 0
  argword = ""
  
  # Find the word with the highest similarity to the first word
  for idx, word in enumerate(words):
    if type(similarity_df[word]) is pd.DataFrame:
      print(similarity_df[word])
    similar_words = similarity_df[word].sort_values(ascending=False)
    if similar_words.iloc[1] > _max:
      _max = similar_words.iloc[1]
      argmax = idx
      argword = similar_words.index[1]

  # Initialize the build list with the most similar pair of words
  build_list = [words[argmax], argword]

  # Create a copy of the original words list to avoid modifying it
  words_copy = words.copy()
  
  # Finding the third most similar word to the build list
  # Remove the most similar pair from the original words list
  for test_word in build_list:
    if test_word not in words_copy:
      return None
    words_copy.remove(test_word)

  # Calculate average similarity of remaining words to the build list
  sim_list = []
  for test_word in words_copy:
    similarities = []
    for train_word in build_list:
        if train_word in model and test_word in model:
            similarity = model.similarity(train_word, test_word)
            similarities.append(similarity)
        else:
            similarities.append(0)  # Handle words not in the model
    average_similarity = sum(similarities) / len(similarities)
    sim_list.append(average_similarity)

  # Find the word with the highest average similarity to the build list
  index_of_highest_value = sim_list.index(max(sim_list))
  build_list.append(words_copy[index_of_highest_value])

  # Finding the fourth most similar word to the build list
  # Pretty much same code as the third most similar word
  words_copy = words.copy()
  for test_word in build_list:
    if test_word not in words_copy:
      return None
    words_copy.remove(test_word)

  sim_list = []
  for test_word in words_copy:
    similarities = []
    for train_word in build_list:
        if train_word in model and test_word in model:
            similarity = model.similarity(train_word, test_word)
            similarities.append(similarity)
        else:
            similarities.append(0)  # Handle words not in the model
    average_similarity = sum(similarities) / len(similarities)
    sim_list.append(average_similarity)

  index_of_highest_value = sim_list.index(max(sim_list))
  build_list.append(words_copy[index_of_highest_value])

  # Return the final list of four words
  return build_list

In [10]:
def eval_round(guess_list, solution):
  """
  Evaluate the guess list against the solution.

  Args:
      guess_list (list): The list of guessed words. Should contain 4 entries.
      solution (dict): The solution dictionary containing the correct groups.

  Returns:
      int: The maximum number of correct guesses in any group.
  """
  # right_count evaluates the number of correct guesses in each group
  right_count = [0, 0, 0, 0]
  
  # Check if the guess list is valid
  if len(guess_list) != 4:
    return None
  
  # Check if the guess list aligns with a solution
  for final_word in guess_list:
    for idx, group in enumerate(solution['groups']):
      if final_word in group['words']:
        right_count[idx] += 1
  
  # Return the maximum number of correct guesses in any group
  # If the guess was all right, then the max will be 4
  return max(right_count)

In [7]:
models = [model_google, model_glove, model_wiki, model_numberbatch]
model_names = ["Google News", "Glove", "Wikipedia", "Numberbatch"]
correct_idx = []
for idx, model in enumerate(models):
  print(f"======== {model_names[idx]} ========")
  right_list = []
  one_away_when = []
  for i in range(ds_len):
    guess_list = guess(model, ds[i]['words'])
    if guess_list is not None:
      score = eval_round(guess_list, ds[i]['solution'])
      right_list.append(score)
      if score == 4 and i not in correct_idx:
        correct_idx.append(i)

  print(f"AVERAGE SCORE: {sum(right_list) / len(right_list)}")
  for i in range(1, 5):
    print(f"{i}: {right_list.count(i)}")
  print()
print(f"Number of Games with At Least One Good First Guess: {len(correct_idx)} / {ds_len}")

NameError: name 'model_google' is not defined

## Evaluate games

In [11]:
def compute_similarity_matrix(model, words):
    words = [preprocess_word(word, model) for word in words]
    words = [word for word in words if word in model]
    
    similarity_matrix = {}
    for i, word1 in enumerate(words):
        for j, word2 in enumerate(words):
            if i < j:  # Avoid redundant computations
                similarity_matrix[(word1, word2)] = model.similarity(word1, word2)
    return similarity_matrix

# Extract words from ds[i]['words'] with fallback guesses
# similarity_matrix: precomputed similarity matrix

def guess_best_combination(model, words, similarity_matrix=None, lives=4):
    if len(words) == 4:
        return [list(words) * lives]
    words = [preprocess_word(word, model) for word in words]
    words = [word for word in words if word in model]

    if len(words) < 4 or lives < 1:
        return None

    if similarity_matrix is None:
        similarity_matrix = compute_similarity_matrix(model, words)

    all_combinations = list(combinations(words, 4))
    scored_combinations = []

    for combination in all_combinations:
        similarities = []
        for i, word1 in enumerate(combination):
            for j, word2 in enumerate(combination):
                if i < j:
                    similarities.append(similarity_matrix.get((word1, word2), similarity_matrix.get((word2, word1), 0)))

        average_similarity = np.mean(similarities)
        scored_combinations.append((combination, average_similarity))

    # Sort combinations by average similarity in descending order
    scored_combinations.sort(key=lambda x: x[1], reverse=True)

    # Return up to four attempts in descending order of similarity
    top_guesses = [list(comb[0]) for comb in scored_combinations[:lives]]
    return top_guesses

In [13]:
print(guess_best_combination(model_numberbatch, ['hunt', 'check', 'game', 'ford', 'president', 'play', 'car', 'stop', 'oxen', 'block', 'movie', 'actor', 'dam', 'dysentery', 'director', 'concert']))

[['game', 'play', 'movie', 'actor'], ['president', 'movie', 'actor', 'director'], ['play', 'movie', 'actor', 'concert'], ['movie', 'actor', 'director', 'concert']]


In [12]:
def calculate_score(num_correct, strikes):
    """
    Calculate the score based on the number of correct guesses and strikes.
    
    Args:
        num_correct (int): The number of correct guesses (0-4).
        strikes (int): The number of strikes (0-4).
    
    Returns:
        float: The calculated score.
    """
    # Define multipliers and penalties
    multipliers = [1, 2, 3, 3]
    penalties = [1.0, 0.9, 0.75, 0.5, 0.25]

    # Ensure the number of correct groups is within the valid range
    if num_correct > 4:
        num_correct = 4

    # Calculate the total score
    total_score = 0
    for i in range(num_correct):
        total_score += 1 * multipliers[i] * penalties[strikes]

    return np.round(total_score, 2)

# Example usage
num_correct_1 = 4
num_correct_2 = 4
num_correct_3 = 2

strikes_1 = 0
strikes_2 = 1
strikes_3 = 2

print("All Correct with 0 strikes:", calculate_score(num_correct_1, strikes_1))  # Output: 9.0
print("All Correct with 1 strike:", calculate_score(num_correct_2, strikes_2))   # Output: 8.1
print("2 Correct Groups - 2 strikes:", calculate_score(num_correct_3, strikes_3)) # Output: 2.25

All Correct with 0 strikes: 9.0
All Correct with 1 strike: 8.1
2 Correct Groups - 2 strikes: 2.25


In [43]:
models = [model_google, model_glove, model_wiki, model_numberbatch]
model_names = ["Google News", "Glove", "Wikipedia", "Numberbatch"]
correct_idx = []
multiplier = {4: 1.0, 3: 0.9, 2: 0.75, 1: 0.5, 0: 0.25}

# Iterate through each model and evaluate the guesses
for idx, model in enumerate(models):
  print(f"======== {model_names[idx]} ========")
  right_list = []
  correct_guesses = []
  total_scores = []
  one_away_when = []
  for i in range(len(ds)):
    #print("I:", i)
    lives = 4
    correct_count = 0
    total_score = 0
    options = ds[i]['words']
    while lives > 0 and len(options) > 0:
      #print("LEN:", len(options))
      guess_list = guess_best_combination(model, options, lives=lives)
      #print("GUESS:", guess_list)
      if guess_list is None:
        lives -= 1
        continue
      if guess_list is not None:
        for guess in guess_list:
          score = eval_round(guess, ds[i]['solution'])
          if score == 4:
            correct_count += 1
            right_list.append(score)
            options = [item for item in options if item not in guess]
            if len(options) == 4:
              correct_count += 1
              options = []
            break
          lives -= 1
          if guess == guess_list[-1] or lives == 0:
            right_list.append(score)
            break
    correct_guesses.append(correct_count)
    if model == model_numberbatch and i > 600:
      print(f"GAME {i}: {correct_count} correct guesses, {lives} lives left")
    total_scores.append(calculate_score(correct_count, 4 - lives))
    if correct_count == 4 and i not in correct_idx:
      correct_idx.append(i)

  print(f"AVERAGE SCORE: {sum(correct_guesses) / len(correct_guesses)}")
  for i in range(0, 5):
    print(f"{i}: {correct_guesses.count(i)}")
  print(f"Average Total Score: {sum(total_scores) / len(total_scores)} (Total: {sum(total_scores)})")
  print()
print(f"Number of Games with At Least One Complete Solve: {len(correct_idx)} / {ds_len}")

AVERAGE SCORE: 0.893312101910828
0: 323
1: 169
2: 76
3: 0
4: 60
Average Total Score: 0.8366242038216564 (Total: 525.4000000000002)

AVERAGE SCORE: 0.8136942675159236
0: 335
1: 167
2: 80
3: 0
4: 46
Average Total Score: 0.6980095541401274 (Total: 438.35)

AVERAGE SCORE: 1.1767515923566878
0: 264
1: 175
2: 96
3: 0
4: 93
Average Total Score: 1.240525477707007 (Total: 779.0500000000003)

GAME 601: 1 correct guesses, 0 lives left
GAME 602: 4 correct guesses, 4 lives left
GAME 603: 1 correct guesses, 0 lives left
GAME 604: 4 correct guesses, 2 lives left
GAME 605: 1 correct guesses, 0 lives left
GAME 606: 1 correct guesses, 0 lives left
GAME 607: 0 correct guesses, 0 lives left
GAME 608: 1 correct guesses, 0 lives left
GAME 609: 2 correct guesses, 0 lives left
GAME 610: 2 correct guesses, 0 lives left
GAME 611: 2 correct guesses, 0 lives left
GAME 612: 2 correct guesses, 0 lives left
GAME 613: 0 correct guesses, 0 lives left
GAME 614: 0 correct guesses, 0 lives left
GAME 615: 4 correct guesse

In [22]:
print(model_google, ds[0]['words'])
guess_best_combination(model_google, ds[0]['words'], lives=4)

KeyedVectors<vector_size=300, 3000000 keys> ['snow', 'level', 'shift', 'kayak', 'heat', 'tab', 'bucks', 'return', 'jazz', 'hail', 'option', 'rain', 'sleet', 'racecar', 'mom', 'nets']


[['snow', 'hail', 'rain', 'sleet'],
 ['snow', 'heat', 'rain', 'sleet'],
 ['snow', 'jazz', 'rain', 'sleet'],
 ['snow', 'kayak', 'rain', 'sleet']]

In [31]:
from collections import defaultdict

def aggregate_rankings(models, weights, words, lives=4):
    ranking_scores = defaultdict(float)

    for name, model in models.items():
        model_guesses = guess_best_combination(model, words, similarity_matrix=None, lives=lives)
        if model_guesses:
            for rank, guess in enumerate(model_guesses):
                ranking_scores[tuple(guess)] += weights[name] / (rank + 1)  # Convert list to tuple

    # Sort final ranked list
    sorted_guesses = sorted(ranking_scores.items(), key=lambda x: x[1], reverse=True)
    return [list(guess[0]) for guess in sorted_guesses[:lives]]

In [61]:
model_google.most_similar('carrot', topn=10)

[('carrots', 0.7685447931289673),
 ('proverbial_carrot', 0.5643057823181152),
 ('Carrot', 0.48995766043663025),
 ('celery', 0.47531840205192566),
 ('dangling_carrot', 0.47465410828590393),
 ('Coarsely_grate', 0.47062399983406067),
 ('carrot_dangling', 0.4619136154651642),
 ('broccoli', 0.45493656396865845),
 ('raisin_salad', 0.45256876945495605),
 ('shredded_zucchini', 0.4489986002445221)]

In [None]:
weights_dict = {
    "google": 0.8366 / (0.8366 + 0.6980 + 1.2405 + 1.8192),
    "glove": 0.6980 / (0.8366 + 0.6980 + 1.2405 + 1.8192),
    "wiki": 1.2405 / (0.8366 + 0.6980 + 1.2405 + 1.8192),
    "numberbatch": 1.8192 / (0.8366 + 0.6980 + 1.2405 + 1.8192),
}

models_dict = {
    "google": model_google,
    "glove": model_glove,
    "wiki": model_wiki,
    "numberbatch": model_numberbatch
}

from collections import defaultdict

def evaluate_aggregate_rankings(models, weights, dataset):
    correct_idx = []
    correct_guesses = []
    total_scores = []
    ds_len = len(dataset)
    
    print("======== Aggregate Model Evaluation ========")
    
    for i in range(ds_len):
        lives = 4
        correct_count = 0
        total_score = 0
        options = dataset[i]['words']
        
        while lives > 0 and len(options) > 0:
            guess_list = aggregate_rankings(models, weights, options, lives=lives)
            
            if not guess_list:
                lives -= 1
                continue
            
            for guess in guess_list:
                score = eval_round(guess, dataset[i]['solution'])
                if score == 4:
                    correct_count += 1
                    options = [item for item in options if item not in guess]
                    if len(options) == 4:
                        correct_count += 1
                        options = []
                    break
                
                lives -= 1
                if guess == guess_list[-1] or lives == 0:
                    break
        
        correct_guesses.append(correct_count)
        total_scores.append(calculate_score(correct_count, 4 - lives))
        
        if correct_count == 4 and i not in correct_idx:
            correct_idx.append(i)
    
    print(f"AVERAGE SCORE: {sum(correct_guesses) / len(correct_guesses)}")
    for i in range(0, 5):
        print(f"{i}: {correct_guesses.count(i)}")
    print(f"Average Total Score: {sum(total_scores) / len(total_scores)} (Total: {sum(total_scores)})")
    print()
    print(f"Number of Games with At Least One Complete Solve: {len(correct_idx)} / {ds_len}")

# Call the evaluation function
evaluate_aggregate_rankings(models_dict, weights_dict, ds)

AVERAGE SCORE: 1.5047770700636942
0: 193
1: 189
2: 114
3: 0
4: 132
Average Total Score: 1.7828025477707008 (Total: 1119.6000000000001)

Number of Games with At Least One Complete Solve: 132 / 628


## ChatGPT

In [None]:
def eval_round(guess_list, solution):
  """
  Evaluate the guess list against the solution.

  Args:
      guess_list (list): The list of guessed words. Should contain 4 entries.
      solution (dict): The solution dictionary containing the correct groups.

  Returns:
      int: The maximum number of correct guesses in any group.
  """
  # right_count evaluates the number of correct guesses in each group
  right_count = [0, 0, 0, 0]
  
  # Check if the guess list is valid
  if len(guess_list) != 4:
    return None
  
  # Check if the guess list aligns with a solution
  for final_word in guess_list:
    for idx, group in enumerate(solution['groups']):
      if final_word in group['words']:
        right_count[idx] += 1
  
  # Return the maximum number of correct guesses in any group
  # If the guess was all right, then the max will be 4
  return max(right_count)

In [None]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

In [40]:
client = OpenAI(api_key=api_key)
description = "You are an assistant configured to solve the New York Times Connections Word game."
init_prompt = """
You are an assistant configured to solve the New York Times Connections Word game.
Out of the given words, please return a group of 4 words that you are most confident are related to each other.
Please output your response in a JSON format with the following structure:
{
  "words": ["word1", "word2", "word3", "word4"],
  "reason": "Your reasoning here."
}

You may assume the following:
1. The provided list of words will always be a multiple of four, and a group of four words will always exist.
2. Every word in the provided list is part of a group of four words, but you only need to make one guess.
3. There will never be a "miscellaneous" group, and no word will be part of more than one group.
4. A red herring category may be present, where some words appear to be related but are not part of the correct group.

Please give your answer in a JSON format and do not provide any other text. Your words to choose from are as follows:
"""

In [48]:
eval_round(['foot', 'league', 'mile', 'yard'], ds[1]['solution'])

4

In [None]:
for game in range(10): # currently looping first 10 games
  current_list = ds[game]['words'].copy()
  print(f"Game {game+1}: {current_list}")
  responses = []
  response_acc = []
  lives = 4
  invalid_lives = 3
  client = OpenAI(api_key=api_key)
  description = "You are an assistant configured to solve the New York Times Connections Word game."
  init_prompt = """
  You are an assistant configured to solve the New York Times Connections Word game.
  Out of the given words, please return a group of 4 words that you are most confident are related to each other.
  Please output your response in a JSON format with the following structure:
  {
    "words": ["word1", "word2", "word3", "word4"],
    "reason": "Your reasoning here."
  }

  You may assume the following:
  1. The provided list of words will always be a multiple of four, and a group of four words will always exist.
  2. Every word in the provided list is part of a group of four words, but you only need to make one guess.
  3. There will never be a "miscellaneous" group, and no word will be part of more than one group.
  4. A red herring category may be present, where some words appear to be related but are not part of the correct group.

  Please give your answer in a JSON format and do not provide any other text. Your words to choose from are as follows:
  """

  # loop through game until we get a complete game or game over (run out of 4 lives, or make 3 invalid guesses)
  while len(current_list) > 0 and lives > 0 and invalid_lives > 0:
    words = ", ".join(current_list)
    
    # display previous guesses, if any
    if len(responses) > 0:
      addl_prompt = """
      
      In addition, below is a list of previous guesses you made, and whether it's a correct, incorrect, or invalid guess.
      Please use these to help inform your decision, and remember to only choose words from the words list provided above.
      Do not repeat any of your previous guesses.
      Your previous guesses:
      """
      for i, prevs in enumerate(responses):
        addl_prompt += ', '.join(prevs)
        if response_acc[i] == 1:
          addl_prompt += " (correct)\n"
        elif response_acc[i] == 0:
          addl_prompt += " (incorrect)\n"
        else:
          addl_prompt += " (invalid)\n"
    else:
      addl_prompt = ""
    prompt = init_prompt + words + addl_prompt
    # print(prompt)
    
    # get response from OpenAI API
    response = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "system", "content": description},
        {"role": "user", "content": prompt}
      ],
      max_completion_tokens=500,
      response_format={ "type": "json_object" }
    )
    response = response.choices[0].message.content
    response = json.loads(response)
    print(response)
    
    # standardize response: lower case, sort alphabetically
    response['words'] = sorted([word.lower() for word in response['words']])
    
    # Check for invalid responses
    # Check if all words in guess are in current list
    invalid = False
    for word in response['words']:
      if word not in current_list:
        print("Invalid response: made up a word.") # debug
        responses.append(response['words'])
        response_acc.append(-1)
        invalid_lives -= 1
        invalid = True
    
    # Check if the response is a duplicate of previous responses, or if there's not exactly 4 unique words
    if sorted(response['words']) in responses or len(set(response['words'])) != 4:
      print("Invalid response: duplicate or not 4 unique words.") # debug
      responses.append(response['words'])
      response_acc.append(-1)
      invalid_lives -= 1
      invalid = True
    
    if invalid:
      continue
    
    # valid guess, get score
    responses.append(response['words'])
    score = eval_round(response['words'], ds[game]['solution'])
    if score == 4:
      print("Correct response!") # debug
      response_acc.append(1)
      current_list = [item for item in current_list if item not in response['words']]
      continue
    else:
      print("Incorrect." + (" One away..." if score == 3 else "")) # debug
      response_acc.append(0)
      lives -= 1
      continue
  
  # Your tasks:
  # 1. Save the results of the responses to the JSON format
  # 2. Experiment with the different responses/shots (zero-shot, one-shot, few-shot)
  # 3. Once you're happy with the first two, run it on the entire dataset and save the JSON data of each game.

Game 1: ['snow', 'level', 'shift', 'kayak', 'heat', 'tab', 'bucks', 'return', 'jazz', 'hail', 'option', 'rain', 'sleet', 'racecar', 'mom', 'nets']
{'words': ['snow', 'hail', 'sleet', 'rain'], 'reason': 'These words are all types of precipitation.'}
['hail', 'rain', 'sleet', 'snow'] 4
Correct response!
{'words': ['kayak', 'racecar', 'mom', 'level'], 'reason': 'These words are palindromes; they read the same backward as forward.'}
['kayak', 'level', 'mom', 'racecar'] 4
Correct response!
{'words': ['jazz', 'nets', 'bucks', 'heat'], 'reason': 'These are all names of professional basketball teams in the NBA: Utah Jazz, Brooklyn Nets, Milwaukee Bucks, and Miami Heat.'}
['bucks', 'heat', 'jazz', 'nets'] 4
Correct response!
{'words': ['shift', 'tab', 'return', 'option'], 'reason': 'These words are all keys on a computer keyboard.'}
['option', 'return', 'shift', 'tab'] 4
Correct response!
Game 2: ['pump', 'foot', 'time', 'sea', 'league', 'loafer', 'why', 'us', 'boot', 'yard', 'people', 'are', '

In [47]:
ds[1]['solution']

{'groups': [{'words': ['pump', 'loafer', 'boot', 'sneaker'],
   'reason': 'footwear'},
  {'words': ['sea', 'why', 'are', 'queue'], 'reason': 'letter homophones'},
  {'words': ['time', 'us', 'people', 'essence'], 'reason': 'magazines'},
  {'words': ['foot', 'league', 'yard', 'mile'], 'reason': 'units of length'}]}

In [34]:
ds[100]['words']

['extra',
 'ball',
 'won',
 'mug',
 'pin',
 'copy',
 'too',
 'tee',
 'ate',
 'alley',
 'pen',
 'backup',
 'spare',
 'tote',
 'for',
 'lane']

In [29]:
eval_round(responses[0], ds[0]['solution'])

4

## TODO

Due: 6 March 2025

Group 1:
1. Write some code to try and better save attributes of results!
    * Ex: What are the guesses that are being made? What does each game look like?
    * I recommend saving the results as a JSON for organization, but feel free to decide how you'd like to save your results.
2. Generate a graph of some kind that can give some insights!
    * E.g. What kinds of groups are most commonly solved? What do you see with groups that are solved? What kinds of group difficulties are solved most often?

Group 2:
* Devise a better guessing algorithm, and implement it.

In [38]:
# call datamuse api to find most common words that appear before and after, see if there's any in common
import requests
words = ds[3]['words']
suffixes = {}
for word in words:
  response = requests.get(f"https://api.datamuse.com/words?sp={word}*")
  response = response.json()
  for result in response:
    if len(result['word']) > len(word) and result['word'][len(word):] not in suffixes:
      suffixes[result['word'][len(word):]] = result['score']
    elif len(result['word']) > len(word):
      suffixes[result['word'][len(word):]] += result['score']

sorted_suffixes = sorted(suffixes.items(), key=lambda x: x[1], reverse=True)
print(sorted_suffixes[:10])

[('y', 3474), ('sede', 2225), ('tery', 2195), ('fluous', 2087), ('h', 1995), ('ior', 1873), ('ing', 1808), ('e', 1802), ('ed', 1764), ('cilious', 1741)]


In [39]:
# get all groups that have "___" in reason
groups_with_underscore = []
for game in ds:
  for group in game['solution']['groups']:
    if '___' in group['reason']: # append tuple of word group and game index
      groups_with_underscore.append((group, ds.index(game)))

print(groups_with_underscore)

[({'words': ['spider', 'iron', 'super', 'bat'], 'reason': '___ man superheroes'}, 3), ({'words': ['star', 'silver', 'jelly', 'cray'], 'reason': '___ fish that aren’t fish'}, 6), ({'words': ['bean', 'clean', 'peanut', 'fox'], 'reason': 'mr. ___'}, 15), ({'words': ['bermuda', 'love', 'right', 'acute'], 'reason': '___ triangle'}, 16), ({'words': ['baby', 'wayne', 'kim', 'jon'], 'reason': 'lil ___ rappers'}, 20), ({'words': ['bite', 'wave', 'barrier', 'asleep'], 'reason': 'sound ___'}, 21), ({'words': ['pot', 'ass', 'rabbit', 'knife'], 'reason': 'jack ___'}, 26), ({'words': ['copy', 'alley', 'cool', 'lap'], 'reason': '___ cat'}, 28), ({'words': ['cement', 'soul', 'band', 'duckie'], 'reason': 'rubber ___'}, 31), ({'words': ['half', 'neck', 'so', 'again'], 'reason': '___ and ___'}, 34), ({'words': ['eye', 'truth', 'gun', 'mole rat'], 'reason': 'naked ___'}, 35), ({'words': ['pepper', 'evil', 'no', 'j'], 'reason': 'dr. ___'}, 38), ({'words': ['jungle', 'house', 'fat', 'doja'], 'reason': '___ 

In [62]:
from collections import defaultdict

def get_affixes_for_word(word, datamuse_results, mode="suffix"):
    affixes = []
    for entry in datamuse_results:
        result_word = entry['word']
        if result_word == word:
            continue

        if mode == "suffix" and result_word.startswith(word):
            affix = result_word[len(word):]
        elif mode == "prefix" and result_word.endswith(word):
            affix = result_word[:len(result_word) - len(word)]
        else:
            continue

        affix = affix.strip()
        if affix:
            affixes.append(affix)
    return affixes

def compute_group_affix_scores(group_data, general_affix_freq=None, mode="suffix"):
    group_affix_freq = defaultdict(int)

    COMMON_AFFIXES = {
        "s", "es", "ed", "ing", "er", "est", "ly", "y", "e", "a", "able", "ible", 
        "al", "ous", "ment", "ful", "less", "tion", "ness", "ant", "ent", "ive",
        "ic", "ary", "ate", "an", "ian", "or", "ism", "ist", "ish", "like", "ier",
        "up", "down", "back", "pre", "re", "un", "in", "im", "non", "dis", "over", "under"
    }

    for word, results in group_data.items():
        affixes = get_affixes_for_word(word, results, mode=mode)
        for affix in affixes:
            if affix not in COMMON_AFFIXES:
                group_affix_freq[affix] += 1

    scores = {}
    for affix, freq in group_affix_freq.items():
        general_freq = general_affix_freq.get(affix, 1) if general_affix_freq else 1
        scores[affix] = freq / general_freq

    sorted_affixes = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    sorted_affixes = [affix for affix in sorted_affixes if affix[1] >= 3]
    return sorted_affixes


words = ds[583]['words']
prefix_data = {}
suffix_data = {}
for word in words:
    p_response = requests.get(f"https://api.datamuse.com/words?sp=*{word}")
    prefix_data[word] = p_response.json()
    s_response = requests.get(f"https://api.datamuse.com/words?sp={word}*")
    suffix_data[word] = s_response.json()

print(compute_group_affix_scores(prefix_data, mode="prefix"))
print("=======================")
print(compute_group_affix_scores(suffix_data, mode="suffix"))

[('power', 5.0), ('free', 4.0), ('electronic', 4.0), ('active', 3.0), ('flat', 3.0), ('micro', 3.0), ('kill', 3.0), ('ice', 3.0), ('inter', 3.0), ('counter', 3.0), ('anti', 3.0), ('e-', 3.0), ('retail', 3.0), ('t', 3.0), ('rat', 3.0), ('press', 3.0)]
[('rat', 9.0), ('rats', 5.0), ('ers', 4.0), ('off', 4.0), ('on', 4.0), ('d', 4.0), ('ings', 3.0), ('cycle', 3.0), ('board', 3.0), ('el', 3.0), ('man', 3.0), ('men', 3.0), ('department', 3.0), ('en', 3.0), ('name', 3.0)]


In [75]:
%pip install osmnx

Defaulting to user installation because normal site-packages is not writeable
Collecting osmnx
  Downloading osmnx-2.0.2-py3-none-any.whl.metadata (4.9 kB)
Downloading osmnx-2.0.2-py3-none-any.whl (99 kB)
Installing collected packages: osmnx
Successfully installed osmnx-2.0.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
import osmnx as ox
import networkx as nx
import folium

# 1. Starting point (Amherst, MA)
start_lat, start_lon = 42.3732, -72.5199
start_point = (start_lat, start_lon)

# 2. Download road network
G = ox.graph_from_point(start_point, dist=150000, network_type='drive')

# 3. Find nearest node
orig_node = ox.nearest_nodes(G, X=start_lon, Y=start_lat)

# 4. Calculate shortest paths within 90 miles (144,840 meters)
max_dist = 144840
distances = nx.single_source_dijkstra_path_length(G, orig_node, cutoff=max_dist, weight='length')

# 5. Visualize
reachable_nodes = list(distances.keys())
subgraph = G.subgraph(reachable_nodes)
ox.plot_graph_folium(subgraph, popup_attribute='length', tiles='cartodbpositron')

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)


In [73]:
ds[583]['words']

['market',
 'switch',
 'plant',
 'mall',
 'trade',
 'gym',
 'outlet',
 'asset',
 'rug',
 'business',
 'mole',
 'sconce',
 'pack',
 'agent',
 'baseboard',
 'commerce']

In [63]:
model_wiki = api.load('fasttext-wiki-news-subwords-300')
model_wiki.most_similar('seattle')

[('minneapolis', 0.728505551815033),
 ('portland', 0.7126332521438599),
 ('vancouver', 0.6863006949424744),
 ('calgary', 0.6720302104949951),
 ('philadelphia', 0.6713477373123169),
 ('baltimore', 0.6664227247238159),
 ('houston', 0.6611942052841187),
 ('denver', 0.6545454859733582),
 ('melbourne', 0.6510372757911682),
 ('pittsburgh', 0.6482068300247192)]

In [72]:
model_wiki.similarity('pushup', 'push-up')

0.8519377

In [14]:
ds[3]

{'words': ['dust',
  'cats',
  'spider',
  'carousel',
  'puma',
  'iron',
  'nike',
  'mop',
  'chicago',
  'sweep',
  'super',
  'bat',
  'reebok',
  'cabaret',
  'vacuum',
  'adidas'],
 'solution': {'groups': [{'words': ['spider', 'iron', 'super', 'bat'],
    'reason': '___ man superheroes'},
   {'words': ['dust', 'mop', 'sweep', 'vacuum'], 'reason': 'cleaning verbs'},
   {'words': ['cats', 'carousel', 'chicago', 'cabaret'],
    'reason': 'musicals beginning with “c”'},
   {'words': ['puma', 'nike', 'reebok', 'adidas'],
    'reason': 'sneaker brands'}]}}