In [3]:
!pip install numpy



In [4]:
import numpy as np
import random
from collections import defaultdict
import math

In [5]:
# Define the environment
class GuessingGame:
    def __init__(self, target_number, max_attempts):
        self.target_number = target_number
        self.max_attempts = max_attempts
        self.current_attempt = 0
        
    def reset(self):
        self.current_attempt = 0
        return self.current_attempt
    
    def step(self, action):
        self.current_attempt += 1
        if action == self.target_number:
            reward = 10
            done = True
        elif self.current_attempt >= self.max_attempts:
            reward = -10
            done = True
        else:
            reward = -1
            done = False
        
        return self.current_attempt, reward, done

# Q-learning agent
class QLearningAgent:
    def __init__(self, state_space, action_space, learning_rate=0.1, discount_factor=0.95, epsilon=0.1):
        self.q_table = np.zeros((state_space, action_space))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = epsilon
        
    def get_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return random.randint(0, self.q_table.shape[1] - 1)
        else:
            return np.argmax(self.q_table[state])
    
    def update(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.discount_factor * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.learning_rate * td_error


In [6]:
class MCTSNode:
    def __init__(self, state, parent=None):
        self.state = state
        self.parent = parent
        self.children = {}
        self.visits = 0
        self.value = 0

class MCTS:
    def __init__(self, env, num_simulations=100, exploration_constant=1.41):
        self.env = env
        self.num_simulations = num_simulations
        self.exploration_constant = exploration_constant

    def search(self, state):
        root = MCTSNode(state)

        for _ in range(self.num_simulations):
            node = self.select(root)
            reward = self.simulate(node.state)
            self.backpropagate(node, reward)

        return max(root.children.items(), key=lambda x: x[1].visits)[0]

    def select(self, node):
        while node.children:
            if len(node.children) < 10:  # Not all actions have been tried
                return self.expand(node)
            else:
                node = self.uct_select(node)
        return self.expand(node)

    def expand(self, node):
        untried_actions = set(range(10)) - set(node.children.keys())
        action = random.choice(list(untried_actions))
        next_state, _, _ = self.env.step(action)
        child = MCTSNode(next_state, parent=node)
        node.children[action] = child
        return child

    def simulate(self, state):
        current_state = state
        while True:
            action = random.randint(0, 9)
            next_state, reward, done = self.env.step(action)
            if done:
                return reward
            current_state = next_state

    def backpropagate(self, node, reward):
        while node:
            node.visits += 1
            node.value += reward
            node = node.parent

    def uct_select(self, node):
        log_n = math.log(node.visits)
        return max(node.children.items(),
                   key=lambda x: x[1].value / x[1].visits + self.exploration_constant * math.sqrt(log_n / x[1].visits))[1]


In [8]:
# Interactive testing loop
while True:
    target_number = int(input("Enter a number for the agent to guess (0-9), or -1 to quit: "))
    if target_number == -1:
        break
    
    env = GuessingGame(target_number, max_attempts=10)
    mcts = MCTS(env)  # Create MCTS instance here
    state = env.reset()
    done = False
    total_reward = 0
    attempts = 0

    print(f"\nAgent is trying to guess the number {target_number}")
    while not done:
        action = mcts.search(state)
        next_state, reward, done = env.step(action)
        total_reward += reward
        attempts += 1
        print(f"Attempt {attempts}: Agent guessed {action}")
        state = next_state

    print(f"Game finished. Total attempts: {attempts}, Total reward: {total_reward}")
    print()



Agent is trying to guess the number 1
Attempt 1: Agent guessed 2
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 2
Attempt 1: Agent guessed 2
Game finished. Total attempts: 1, Total reward: 10


Agent is trying to guess the number 9
Attempt 1: Agent guessed 8
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 4
Attempt 1: Agent guessed 5
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 3
Attempt 1: Agent guessed 0
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 5
Attempt 1: Agent guessed 0
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 4
Attempt 1: Agent guessed 8
Game finished. Total attempts: 1, Total reward: -10


Agent is trying to guess the number 6
Attempt 1: Agent guessed 6
Game finished. Total attempts: 1, Total reward: 10


Agent is trying to guess the number 2
Attempt 1: 

ValueError: invalid literal for int() with base 10: ''

In [9]:
# Test the trained agent
state = env.reset()
done = False
total_reward = 0

print("Testing the trained agent:")
while not done:
    action = agent.get_action(state)
    next_state, reward, done = env.step(action)
    total_reward += reward
    print(f"Attempt {state + 1}: Guessed {action}")
    state = next_state

print(f"Game finished. Total reward: {total_reward}")

Testing the trained agent:


NameError: name 'agent' is not defined