Soumya Mukherjee - ch24m571

In [3]:
!pip install beautifulsoup4 requests


In [2]:
import requests
from bs4 import BeautifulSoup
import numpy as np
from collections import defaultdict
import random
from math import log2

def load_words():
    url = "https://www.scrabble.org.au/words/fours.htm"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', class_='table-striped')
    if not table:
        table = soup.find('table')  # Fallback
    
    words = []
    for row in table.find_all('tr'):
        cols = row.find_all('td')
        if cols:
            word = cols[0].text.strip().upper()
            if len(word) == 4 and len(set(word)) == 4:
                words.append(word)
    return list(set(words))

def compute_feedback(guess, secret):
    dogs = sum(1 for i in range(4) if guess[i] == secret[i])
    common = len(set(guess) & set(secret))
    cats = common - dogs
    return (cats, dogs)

def entropy(word_set):
    if not word_set:
        return 0
    n = len(word_set)
    return -sum((1/n) * log2(1/n) for _ in word_set)

class CatDogGame:
    def __init__(self, words):
        self.words = words
        self.feedback_matrix = self.precompute_feedback()
        self.q_table = defaultdict(lambda: np.zeros(len(words)))
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9   # Discount factor
        self.epsilon = 0.3 # Exploration rate
    
    def precompute_feedback(self):
        matrix = {}
        n = len(self.words)
        for i in range(n):
            for j in range(n):
                matrix[(i, j)] = compute_feedback(self.words[i], self.words[j])
        return matrix
    
    def get_state_key(self, possible_indices):
        return frozenset(possible_indices)
    
    def update_q_table(self, state, action, reward, next_state):
        current_q = self.q_table[state][action]
        max_next_q = np.max(self.q_table[next_state]) if next_state in self.q_table else 0
        new_q = (1 - self.alpha) * current_q + self.alpha * (reward + self.gamma * max_next_q)
        self.q_table[state][action] = new_q
    
    def choose_action(self, state, possible_indices):
        state_key = self.get_state_key(possible_indices)
        if random.random() < self.epsilon:
            return random.choice(possible_indices)
        else:
            valid_actions = possible_indices
            q_values = [self.q_table[state_key][i] for i in valid_actions]
            return valid_actions[np.argmax(q_values)]
    
    def calculate_reward(self, guess_idx, possible_indices):
        # Information gain reward
        feedback_counts = defaultdict(int)
        for idx in possible_indices:
            fb = self.feedback_matrix[(guess_idx, idx)]
            feedback_counts[fb] += 1
        
        total = len(possible_indices)
        entropy_before = entropy(possible_indices)
        entropy_after = 0
        for count in feedback_counts.values():
            p = count / total
            entropy_after += p * entropy(range(count))
        
        info_gain = entropy_before - entropy_after
        return info_gain * 10  # Scale reward
    
    def train_episode(self, secret_idx):
        possible_indices = set(range(len(self.words)))
        state_key = self.get_state_key(possible_indices)
        guesses = 0
        
        while True:
            guess_idx = self.choose_action(state_key, list(possible_indices))
            guesses += 1
            
            # Get actual feedback
            feedback = self.feedback_matrix[(guess_idx, secret_idx)]
            
            # Check if guessed
            if feedback == (0, 4):
                reward = 20 - guesses  # Positive reward for success
                self.update_q_table(state_key, guess_idx, reward, None)
                return guesses
            
            # Update possible words
            new_possible = set()
            for idx in possible_indices:
                if self.feedback_matrix[(guess_idx, idx)] == feedback:
                    new_possible.add(idx)
            possible_indices = new_possible
            
            # Calculate reward
            reward = -1 + self.calculate_reward(guess_idx, possible_indices)
            
            # Update Q-table
            new_state_key = self.get_state_key(possible_indices)
            self.update_q_table(state_key, guess_idx, reward, new_state_key)
            state_key = new_state_key
    
    def evaluate(self, num_episodes=1000):
        total_guesses = 0
        for _ in range(num_episodes):
            secret_idx = random.randint(0, len(self.words)-1)
            total_guesses += self.train_episode(secret_idx)
        return total_guesses / num_episodes

# Main execution
if __name__ == "__main__":
    words = load_words()
    print(f"Loaded {len(words)} valid words")
    
    game = CatDogGame(words)
    avg_guesses = game.evaluate(num_episodes=500)
    print(f"Average guesses after training: {avg_guesses:.2f}")
    
    # Test performance
    test_results = []
    for secret_idx in range(len(words)):
        possible_indices = set(range(len(words)))
        guesses = 0
        
        while True:
            state_key = frozenset(possible_indices)
            guess_idx = np.argmax(game.q_table[state_key])
            guesses += 1
            
            feedback = compute_feedback(words[guess_idx], words[secret_idx])
            if feedback == (0, 4):
                break
                
            # Update possible words
            possible_indices = {
                idx for idx in possible_indices
                if compute_feedback(words[guess_idx], words[idx]) == feedback
            }
        
        test_results.append(guesses)
    
    print(f"Final average: {np.mean(test_results):.2f}")
    max_guesses = max(test_results)
    toughest = words[test_results.index(max_guesses)]
    print(f"Toughest word: {toughest} ({max_guesses} guesses)")

ModuleNotFoundError: No module named 'requests'