In [1]:
from scoring_utils import score_guess, compute_advantages

In [2]:
REMAINING_WORDS = [
  "charm", "cape", "bay", "pawn", "tights", "hex", "instrument", "puppet", "scott", "woo", "underwear", "carpenter", "mask", "tool", "spell", "magic"
]
INVALID_GUESSES =[
    ["charm", "cape", "bay", "pawn", "tights"],
]


In [3]:

# Sample solution structure from test_set_1.jsonl
SOLUTION = {
    "groups": [
        {"words": ["instrument", "pawn", "puppet", "tool"], "reason": "one being manipulated"},
        {"words": ["charm", "hex", "magic", "spell"], "reason": "sorcerer's output"},
        {"words": ["cape", "mask", "tights", "underwear"], "reason": "classic superhero wear"},
        {"words": ["bay", "carpenter", "scott", "woo"], "reason": "action movie directors"}
    ]
}

# Test cases
test_cases = [
    {
        "name": "Valid guess with good match",
        "guess": ["charm", "hex", "magic", "spell"],
        "expected": 1.0  # Perfect match for group 2 (4 * 0.25)
    },
    {
        "name": "Valid guess with partial match",
        "guess": ["charm", "hex", "magic", "bay"],
        "expected": 0.75  # 3 matches with group 2 (3 * 0.25)
    },
    {
        "name": "Valid guess with matches across groups",
        "guess": ["charm", "bay", "cape", "tool"],
        "expected": 0.25  # Max score is 0.25 (1 word matches in any single group)
    },
    {
        "name": "Multiple matches in different groups",
        "guess": ["charm", "magic", "bay", "scott"],
        "expected": 0.5  # Max score is 0.5 (2 matches in group 2: charm, magic)
    },
    {
        "name": "Invalid guess - word not in remaining_words",
        "guess": ["charm", "not_valid", "magic", "spell"],
        "expected": 0
    },
    {
        "name": "Invalid guess - already guessed",
        "guess": ["charm", "cape", "bay", "pawn", "tights"],
        "expected": 0
    },
    {
        "name": "Empty guess",
        "guess": [],
        "expected": 0  # Empty guess should return 0
    },
    {
        "name": "Guess with no matches",
        "guess": ["not_in_solution", "another_word"],
        "expected": 0  # No matches in any group
    },
    {
        "name": "not four words",
        "guess": ["instrument", "pawn", "puppet", "pawn"],
        "expected": 0  # Not four unique words
    }
]

# Run test cases
print("Testing scoring function:")
for test in test_cases:
    score = score_guess(test["guess"], INVALID_GUESSES, REMAINING_WORDS, SOLUTION)
    result = "PASS" if score == test["expected"] else "FAIL"
    print(f"{result}: {test['name']} - Expected: {test['expected']}, Got: {score}")


Testing scoring function:
PASS: Valid guess with good match - Expected: 1.0, Got: 1.0
PASS: Valid guess with partial match - Expected: 0.75, Got: 0.75
PASS: Valid guess with matches across groups - Expected: 0.25, Got: 0.25
PASS: Multiple matches in different groups - Expected: 0.5, Got: 0.5
PASS: Invalid guess - word not in remaining_words - Expected: 0, Got: 0
PASS: Invalid guess - already guessed - Expected: 0, Got: 0
PASS: Empty guess - Expected: 0, Got: 0
PASS: Guess with no matches - Expected: 0, Got: 0
PASS: not four words - Expected: 0, Got: 0


In [4]:

# Function to interactively test the scoring function
def check_score(guess_words):
    """Check the score of a custom guess against the current solution"""
    if not isinstance(guess_words, list):
        print("Error: guess_words must be a list of strings")
        return
        
    score = score_guess(guess_words, INVALID_GUESSES, REMAINING_WORDS, SOLUTION)
    valid_guess = all(word in REMAINING_WORDS for word in guess_words)
    
    print(f"Guess: {guess_words}")
    print(f"Valid guess (all words in remaining_words): {'Yes' if valid_guess else 'No'}")
    print(f"Score: {score}")
    
    # Print matches with solution groups for debugging
    for i, group in enumerate(SOLUTION["groups"]):
        matches = [word for word in guess_words if word in group["words"]]
        if matches:
            print(f"Group {i+1} ({group['reason']}) matches: {matches} ({len(matches) * 0.25:.2f} points)")
    
    return score


In [5]:

# Example:
check_score(["charm", "hex", "magic", "spell"])

Guess: ['charm', 'hex', 'magic', 'spell']
Valid guess (all words in remaining_words): Yes
Score: 1.0
Group 2 (sorcerer's output) matches: ['charm', 'hex', 'magic', 'spell'] (1.00 points)


1.0

In [6]:
# Test the invalid groups functionality
INVALID_GUESSES.append(["charm", "hex", "magic", "instrument"])  # Add a previous guess to invalid groups

print("\nTesting invalid groups functionality:")
test_invalid = ["charm", "hex", "magic", "instrument"]
score = score_guess(test_invalid, INVALID_GUESSES, REMAINING_WORDS, SOLUTION)
print(f"Score for previously guessed group: {score}")  # Should return 0

# Test a different group that's valid
test_valid = ["instrument", "pawn", "puppet", "tool"]
score = score_guess(test_valid, INVALID_GUESSES, REMAINING_WORDS, SOLUTION)
print(f"Score for valid group: {score}")  # Should return 1.0


Testing invalid groups functionality:
Score for previously guessed group: 0
Score for valid group: 1.0


In [7]:
import random

# set a seed for reproducibility
random.seed(42)

# Generate 6 random groups of 4 words from REMAINING_WORDS with replacement
random_groups = []
for _ in range(6):
    # Sample 4 words with replacement
    group = random.choices(REMAINING_WORDS, k=4)
    random_groups.append(group)

# add a correct guess to the random groups
random_groups.append(["charm", "hex", "magic", "spell"])  # Ensure one group is a correct guess
random_groups.append(["bay", "carpenter", "scott", "instrument"])  # Another valid group
random_groups.append(["instrument", "charm", "bay", "cape"])  # Another valid group
random_groups.append(["bay", "carpenter", "scott", "instrument"])  # Another valid group
random_groups.append(["not_word", "charm", "bay", "cape"])  # Another valid group
random_groups.append(["bay", "carpenter", "scott", "instrument"])  # Another invalide
random_groups.append(["instrument", "charm", "bay", "cape", "bottle"])  # Another valid group
    
# Print the random groups
print("Random groups generated:")
for i, group in enumerate(random_groups):
    print(f"Group {i+1}: {group}")

Random groups generated:
Group 1: ['underwear', 'charm', 'tights', 'pawn']
Group 2: ['carpenter', 'underwear', 'spell', 'cape']
Group 3: ['instrument', 'charm', 'pawn', 'scott']
Group 4: ['charm', 'pawn', 'underwear', 'scott']
Group 5: ['pawn', 'woo', 'mask', 'charm']
Group 6: ['mask', 'carpenter', 'hex', 'bay']
Group 7: ['charm', 'hex', 'magic', 'spell']
Group 8: ['bay', 'carpenter', 'scott', 'instrument']
Group 9: ['instrument', 'charm', 'bay', 'cape']
Group 10: ['bay', 'carpenter', 'scott', 'instrument']
Group 11: ['not_word', 'charm', 'bay', 'cape']
Group 12: ['bay', 'carpenter', 'scott', 'instrument']
Group 13: ['instrument', 'charm', 'bay', 'cape', 'bottle']


In [8]:
# for each group in the random grouops, compute the score
scores = []
for i, group in enumerate(random_groups):
    score = score_guess(group, INVALID_GUESSES, REMAINING_WORDS, SOLUTION)
    scores.append(score)
    print(f"Score for random group {i+1}: {score} - {group}")

Score for random group 1: 0.5 - ['underwear', 'charm', 'tights', 'pawn']
Score for random group 2: 0.5 - ['carpenter', 'underwear', 'spell', 'cape']
Score for random group 3: 0.5 - ['instrument', 'charm', 'pawn', 'scott']
Score for random group 4: 0.25 - ['charm', 'pawn', 'underwear', 'scott']
Score for random group 5: 0.25 - ['pawn', 'woo', 'mask', 'charm']
Score for random group 6: 0.5 - ['mask', 'carpenter', 'hex', 'bay']
Score for random group 7: 1.0 - ['charm', 'hex', 'magic', 'spell']
Score for random group 8: 0.75 - ['bay', 'carpenter', 'scott', 'instrument']
Score for random group 9: 0.25 - ['instrument', 'charm', 'bay', 'cape']
Score for random group 10: 0.75 - ['bay', 'carpenter', 'scott', 'instrument']
Score for random group 11: 0 - ['not_word', 'charm', 'bay', 'cape']
Score for random group 12: 0.75 - ['bay', 'carpenter', 'scott', 'instrument']
Score for random group 13: 0 - ['instrument', 'charm', 'bay', 'cape', 'bottle']


In [9]:
# compute the advantages for each score in scores
advantages = compute_advantages(scores)
print("Advantages for each score:")
for i, advantage in enumerate(advantages):
    print(f"Group {i+1}: {advantage:.2f} - score: {scores[i]} group: {random_groups[i]}")
print("All tests completed.")

Advantages for each score:
Group 1: 0.13 - score: 0.5 group: ['underwear', 'charm', 'tights', 'pawn']
Group 2: 0.13 - score: 0.5 group: ['carpenter', 'underwear', 'spell', 'cape']
Group 3: 0.13 - score: 0.5 group: ['instrument', 'charm', 'pawn', 'scott']
Group 4: -0.73 - score: 0.25 group: ['charm', 'pawn', 'underwear', 'scott']
Group 5: -0.73 - score: 0.25 group: ['pawn', 'woo', 'mask', 'charm']
Group 6: 0.13 - score: 0.5 group: ['mask', 'carpenter', 'hex', 'bay']
Group 7: 1.85 - score: 1.0 group: ['charm', 'hex', 'magic', 'spell']
Group 8: 0.99 - score: 0.75 group: ['bay', 'carpenter', 'scott', 'instrument']
Group 9: -0.73 - score: 0.25 group: ['instrument', 'charm', 'bay', 'cape']
Group 10: 0.99 - score: 0.75 group: ['bay', 'carpenter', 'scott', 'instrument']
Group 11: -1.58 - score: 0 group: ['not_word', 'charm', 'bay', 'cape']
Group 12: 0.99 - score: 0.75 group: ['bay', 'carpenter', 'scott', 'instrument']
Group 13: -1.58 - score: 0 group: ['instrument', 'charm', 'bay', 'cape', 'bo