In [1]:
import uuid
import random
import gym
import numpy as np
import json
class Node:
    def __init__(self, state, action, action_space, reward, terminal):
        self.identifier = str(uuid.uuid1())
        self.parent_identifier = None
        self.children_identifiers = []
        self.untried_actions = list(range(action_space))
        self.state = state
        self.total_simulation_reward = 0
        self.num_visits = 0
        self.performance = 0
        self.action = action
        self.reward = reward
        self.terminal = terminal
        self.q_values = np.zeros(action_space)  # Q-values for actions
        self.action_visits = np.zeros(action_space)  # Count visits for each action

    def __str__(self):
        return "{}: (action={}, visits={}, reward={:d}, ratio={:0.4f})".format(
                                                  self.state,
                                                  self.action,
                                                  self.num_visits,
                                                  int(self.total_simulation_reward),
                                                  self.performance)

    def untried_action(self):
        action = random.choice(self.untried_actions)
        self.untried_actions.remove(action)
        return action

class Tree:
    def __init__(self):
        self.nodes = {}
        self.root = None

    def size(self):
        """Returns the number of nodes in the tree."""
        return len(self.nodes)

    def depth_info(self):
        """Calculates the maximum depth and average depth of the tree."""
        depths = []

        def calculate_depth(node, current_depth):
            depths.append(current_depth)
            for child_id in node.children_identifiers:
                calculate_depth(self.nodes[child_id], current_depth + 1)

        if self.root is not None:
            calculate_depth(self.root, 0)

        if depths:
            max_depth = max(depths)
            avg_depth = sum(depths) / len(depths)
            return max_depth, avg_depth
        else:
            return 0, 0  # In case the tree is empty
        
    def is_expandable(self, node):
        return len(node.untried_actions) > 0 and not node.terminal

    def iter(self, identifier, depth, last_node_flags):
        if identifier is None:
            node = self.root
        else:
            node = self.nodes[identifier]

        if depth == 0:
            yield "", node
        else:
            yield vertical_lines(last_node_flags) + horizontal_line(last_node_flags), node

        children = [self.nodes[identifier] for identifier in node.children_identifiers]
        last_index = len(children) - 1

        depth += 1
        for index, child in enumerate(children):
            last_node_flags.append(index == last_index)
            for edge, node in self.iter(child.identifier, depth, last_node_flags):
                yield edge, node
            last_node_flags.pop()

    def add_node(self, node, parent=None):
        self.nodes.update({node.identifier: node})

        if parent is None:
            self.root = node
        else:
            self.nodes[parent.identifier].children_identifiers.append(node.identifier)
            self.nodes[node.identifier].parent_identifier=parent.identifier

    def children(self, node):
        return [self.nodes[identifier] for identifier in node.children_identifiers]

    def parent(self, node):
        parent_identifier = self.nodes[node.identifier].parent_identifier
        return self.nodes.get(parent_identifier, None)

    def show(self):
        lines = ""
        for edge, node in self.iter(identifier=None, depth=0, last_node_flags=[]):
            lines += "{}{}\n".format(edge, node)
        print(lines)

def vertical_lines(last_node_flags):
    vertical_lines = []
    vertical_line = '\u2502'
    for last_node_flag in last_node_flags[0:-1]:
        if last_node_flag == False:
            vertical_lines.append(vertical_line + ' ' * 3)
        else:
            vertical_lines.append(' ' * 4)
    return ''.join(vertical_lines)

def horizontal_line(last_node_flags):
    horizontal_line = '\u251c\u2500\u2500 '
    horizontal_line_end = '\u2514\u2500\u2500 '
    return horizontal_line_end if last_node_flags[-1] else horizontal_line

class MonteCarloTreeSearch:
    def __init__(self, env, tree, max_depth=10, epsilon=0.1):
        self.env = env
        self.tree = tree
        self.action_space = self.env.action_space.n
        self.max_depth = max_depth
        self.epsilon = epsilon
        state = self.env.reset()
        self.tree.add_node(Node(state=state, action=None, action_space=self.action_space, reward=0, terminal=False))

    def expand(self, node):
        action = node.untried_action()
        state, reward, done, _, _ = self.env.step(action)
        new_node = Node(state=state, action=action, action_space=self.action_space, reward=reward, terminal=done)
        self.tree.add_node(new_node, node)
        return new_node

    def default_policy(self, node, depth):
        total_reward = node.reward
        current_depth = depth

        while not node.terminal and current_depth < self.max_depth:
            if random.random() < self.epsilon:
                action = random.randint(0, self.action_space - 1)  # Explore
            else:
                action = np.argmax(node.q_values)  # Exploit

            state, reward, done, _, _ = self.env.step(action)
            total_reward += reward
            current_depth += 1

            if done:
                break

        return total_reward

    def tree_policy(self):
        node = self.tree.root
        depth = 0

        while not node.terminal and depth < self.max_depth:
            if self.tree.is_expandable(node):
                return self.expand(node)
            else:
                node = random.choice(self.tree.children(node))
                state, reward, done, _, _ = self.env.step(node.action)
                assert node.state == state
            depth += 1

        return node

    def backward(self, node, value):
        while node:
            node.num_visits += 1
            node.total_simulation_reward += value
            node.performance = node.total_simulation_reward / node.num_visits
            
            if node.action is not None:
                node.action_visits[node.action] += 1
                node.q_values[node.action] += (value - node.q_values[node.action]) / node.action_visits[node.action]
                print(node.q_values)
            node = self.tree.parent(node)

    def forward(self):
        self._forward(self.tree.root)

    def _forward(self, node):
        best_child = max(self.tree.children(node), key=lambda n: np.max(n.q_values), default=None)
        
        print("****** {} ******".format(best_child.state))

        for child in self.tree.children(best_child):
            print("{}: {:0.4f}".format(child.state, np.max(child.q_values)))

        if best_child and len(self.tree.children(best_child)) > 0:
            self._forward(best_child)

    def render_policy(self):
        node = self.tree.root
        path = []
        directions = {0: 'Left', 1: 'Down', 2: 'Right', 3: 'Up'}
        print("Rendering final policy...\n")
        
        # Close the current environment if open and reset with rendering enabled
        self.env.close()
        self.env = gym.make('FrozenLake-v1', is_slippery=False, render_mode='human')
        self.env.reset()

        while node and not node.terminal:
            self.env.render()
            path.append(node.state)
            children = self.tree.children(node)
            
            if not children:
                print("No more actions available.")
                break
            
            best_child = max(children, key=lambda n: np.max(n.q_values), default=None)
            if best_child is not None and best_child.action is not None:
                print(f"Action: {directions.get(best_child.action, 'Unknown')} -> State: {best_child.state}")
                self.env.step(best_child.action)  # Take the action in the environment
                node = best_child
            else:
                print("No best child found.")
                break

        # Final rendering after reaching the goal
        self.env.render()
        self.env.close()    
        
        path2 = [item[0] if isinstance(item, tuple) else item for item in path]
        print(f"\nFinal policy path (states): {path2}")
        
        return path2

    def save_q_table(self, filename='q_table.json', precision=8):
        """Save the Q-table to a JSON file with specified decimal precision."""
        q_table = {}
        for node_id, node in self.tree.nodes.items():
            # Round the Q-values to the specified precision
            q_values = np.round(node.q_values, decimals=precision)
            q_table[str(node.state)] = q_values.tolist()

        with open(filename, 'w') as f:
            json.dump(q_table, f, indent=4)  # Pretty-print with 4-space indentation





In [2]:

# def main():
#     env = gym.make('FrozenLake-v1', is_slippery=False)
#     tree = Tree()
#     monteCarloTreeSearch = MonteCarloTreeSearch(env=env, tree=tree)
#     steps = 100000

#     for _ in range(steps):
#         env.reset()
#         node = monteCarloTreeSearch.tree_policy()
#         reward = monteCarloTreeSearch.default_policy(node, depth=0)

#     monteCarloTreeSearch.tree.show()
#     print("Best child choices:")
#     monteCarloTreeSearch.forward()
#     monteCarloTreeSearch.render_policy()

# if __name__ == "__main__":
#     main()

In [3]:
# import random
# from gym.envs.registration import register
# import gym
# from math import sqrt
# import json

# def init_env():
#     register(
#         id='FrozenLakeNotSlippery-v0',
#         entry_point='gym.envs.toy_text:FrozenLakeEnv',
#         kwargs={'map_name': '4x4', 'is_slippery': False}
#     )
#     return gym.make('FrozenLakeNotSlippery-v0')

# def main():
#     random.seed(2)
#     env = init_env()
#     steps_values = [1000, 5000, 10000]  # Example steps values
   
#     results = []

#     for steps in steps_values:
        
#         print(f"\nRunning MCTS with steps: {steps}\n")

#         tree = Tree()
#         monteCarloTreeSearch = MonteCarloTreeSearch(env=env, tree=tree, max_depth=10, epsilon=0.1)

#         for _ in range(0, steps):
#             env.reset()
#             node = monteCarloTreeSearch.tree_policy()
#             reward = monteCarloTreeSearch.default_policy(node, depth=5000)
#             monteCarloTreeSearch.backward(node, reward)

#         # Measure and display tree statistics
#         tree_size = tree.size()
#         max_depth, avg_depth = tree.depth_info()

#         print(f"Tree size: {tree_size}")
#         print(f"Maximum depth: {max_depth}")
#         print(f"Average depth: {avg_depth:.2f}")

#         monteCarloTreeSearch.tree.show()
#         monteCarloTreeSearch.forward()
#         policy_path = monteCarloTreeSearch.render_policy()

#         print(f"Final policy path: {policy_path}")

#         # Save the results for this combination
#         results.append({
#             'steps': steps,
#             'tree_size': tree_size,
#             'max_depth': max_depth,
#             'avg_depth': avg_depth,
#             'policy_path': policy_path
#         })

#     # Save results to a JSON file
#     with open('simulations/mcts_results.json', 'w') as json_file:
#         json.dump(results, json_file, indent=4)

# if __name__ == "__main__":
#     main()


In [4]:
import random
from gym.envs.registration import register
import gym
from math import sqrt
import json

def init_env():
    register(
        id='FrozenLakeNotSlippery-v0',
        entry_point='gym.envs.toy_text:FrozenLakeEnv',
        kwargs={'map_name': '4x4', 'is_slippery': False}
    )
    return gym.make('FrozenLakeNotSlippery-v0')



In [5]:

def main():
    random.seed(2)
    env = init_env()
    steps_values = [1000, 5000, 10000]  # Example steps values
    depths = [3,5,6,7]  # Example max depth values
   
    results = []

    for depth in depths:
        for steps in steps_values:
            
            print(f"\nRunning MCTS with steps: {steps}\n")

            tree = Tree()
            monteCarloTreeSearch = MonteCarloTreeSearch(env=env, tree=tree, max_depth=depth, epsilon=0.1)

            for _ in range(0, steps):
                env.reset()
                node = monteCarloTreeSearch.tree_policy()
                reward = monteCarloTreeSearch.default_policy(node, depth=depth)
                monteCarloTreeSearch.backward(node, reward)

            # Measure and display tree statistics
            tree_size = tree.size()
            max_depth, avg_depth = tree.depth_info()

            print(f"Tree size: {tree_size}")
            print(f"Maximum depth: {max_depth}")
            print(f"Average depth: {avg_depth:.2f}")

            monteCarloTreeSearch.tree.show()
            monteCarloTreeSearch.forward()
            
            policy_path = monteCarloTreeSearch.render_policy()

            print(f"Final policy path: {policy_path}")

            # Save the results for this combination
            results.append({
                'steps': steps,
                'tree_size': tree_size,
                'max_depth': max_depth,
                'avg_depth': avg_depth,
                'policy_path': policy_path
            })

    # Save results to a JSON file
    with open('simulations/mcts_results.json', 'w') as json_file:
        json.dump(results, json_file, indent=4)

if __name__ == "__main__":
    main()



Running MCTS with steps: 1000

[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0