In [1]:
import numpy as np
from scipy.special import comb
import matplotlib.pyplot as plt

In [2]:
class Node:
    def __init__(self, state, parent=None, action=None):
        self.state = state
        self.parent = parent
        self.action = action
        self.children = []
        self.visits = 0
        self.value = 0.0

In [3]:
# Function to develope the simple tree, not include the pruning
def expand_node(node, actions):
    for action in actions:
        new_state = simulate_action(node.state, action)
        child_node = Node(new_state, node, action)
        node.children.append(child_node)
    return node.children


In [4]:
# Reinforcement Action Simulation Action
def simulate_action(state, action):
    x, y = state
    if action == 'up':
        return (x, y + 1)
    elif action == 'down':
        return (x, y - 1)
    elif action == 'left':
        return (x - 1, y)
    elif action == 'right':
        return (x + 1, y)

In [18]:
# Implementation of spectral tree search
def sets_search(root_state, actions, max_depth, num_simulations):
    root_node = Node(root_state)
    for _ in range(num_simulations):
        current_node = root_node
        while len(current_node.children) != 0 and current_node.visits < max_depth:
            # Here we'll use the simple selection process (not a balancing exploration)
            best_child = max(current_node.children, key=lambda child: child.value + np.sqrt(2 * np.log(current_node.visits)/child.visits) if child.visits > 0 else np.inf)
            current_node = best_child

        # If we're at a leaf node, expand it
        if current_node.visits < max_depth:
            expand_node(current_node, actions)

        # Evaluation phase
        target_state = (5,4) # Assume this's the target
        if current_node.state == target_state:
            reward = 1.0
        else:
            reward = 0.0

        # Backpropagation
        while current_node:
            current_node.visits += 1
            current_node.value += (reward - current_node.value) / current_node.visits
            current_node = current_node.parent

    best_node = max(root_node.children, key=lambda child: child.value)
    trajectory = []

    while best_node:
        trajectory.append(best_node.state)
        best_node = best_node.parent

    return list(reversed(trajectory))



In [19]:
# Initial settings
root_state = (0,0)
actions = ['up', 'down', 'left', 'right']
max_depth = 10
num_simulations = 1000

# Run the search
trajectory = sets_search(root_state, actions, max_depth, num_simulations)

In [20]:
trajectory

[(0, 0), (0, 1)]

In [1]:
plt.figure(figsize=(8, 8))
plt.plot([x for x, y in trajectory], [y for x, y in trajectory], 'o-')
plt.xlim(-1, 6)
plt.ylim(-1, 6)
plt.title("Best Path")
plt.grid(True)
plt.show()

NameError: name 'plt' is not defined

# YOU CAN USE THIS CODE TO DO THE RESEARCH