# 🎲 Monte‑Carlo Tree‑Search (MCTS) × Tree‑of‑Thought Demo

*Day 5 bonus lab* — blend **MCTS** with LLM scoring for smart search.

We implement **Tic‑Tac‑Toe** with:

* Classic UCT MCTS (random rollouts)  
* **LLM‑guided evaluation** at leaf nodes (optional)  
* Comparison to naive random play

> Set `OPENAI_API_KEY` for model‑based value estimates.

## 🔧 0. Setup

In [None]:
%pip -q install --upgrade openai
import os, math, random, copy
import openai
openai.api_key = os.getenv("OPENAI_API_KEY")
MODEL="gpt-4o-mini"

---

## 1️⃣ Tic‑Tac‑Toe Environment

In [None]:
class TicTacToe:
    def __init__(self):
        self.board=[' ']*9  # 0..8
        self.player='X'
    def copy(self): 
        g=TicTacToe(); g.board=self.board[:]; g.player=self.player; return g
    def moves(self):
        return [i for i,b in enumerate(self.board) if b==' ']
    def play(self, move):
        self.board[move]=self.player
        self.player='O' if self.player=='X' else 'X'
    def winner(self):
        wins=[(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]
        for a,b,c in wins:
            if self.board[a]==self.board[b]==self.board[c]!=' ': return self.board[a]
        if ' ' not in self.board: return 'Draw'
        return None
    def __str__(self):
        b=self.board
        return f"{b[0]}|{b[1]}|{b[2]}\n-+-+-\n{b[3]}|{b[4]}|{b[5]}\n-+-+-\n{b[6]}|{b[7]}|{b[8]}"


---

## 2️⃣ Minimal UCT Monte‑Carlo Tree‑Search

In [None]:
class Node:
    def __init__(self, state, parent=None, move=None):
        self.state=state
        self.parent=parent
        self.move=move
        self.children=[]
        self.wins=0
        self.visits=0
    def uct(self, c=1.41):
        if self.visits==0: return float('inf')
        return self.wins/self.visits + c*math.sqrt(math.log(self.parent.visits)/self.visits)

def mcts(root_state, iterations=500, rollout_random=True):
    root=Node(root_state)
    for _ in range(iterations):
        node=root
        # Selection
        while node.children:
            node=max(node.children, key=lambda n:n.uct())
        # Expansion
        winner=node.state.winner()
        if winner is None:
            for m in node.state.moves():
                child_state=node.state.copy()
                child_state.play(m)
                node.children.append(Node(child_state, node, m))
            node=random.choice(node.children)
            winner=node.state.winner()
        # Simulation
        result=evaluate_rollout(node.state, rollout_random)
        # Backprop
        while node:
            node.visits+=1
            if result=='Draw':
                node.wins+=0.5
            elif result==node.state.player:  # note: player switched after move
                node.wins+=1
            node=node.parent
    best_move=max(root.children, key=lambda n:n.visits).move
    return best_move

def evaluate_rollout(state, random_play=True):
    g=state.copy()
    winner=g.winner()
    if winner: return winner
    if random_play or not openai.api_key:
        # random playout
        while True:
            g.play(random.choice(g.moves()))
            winner=g.winner()
            if winner: return winner
    # LLM evaluation: ask model to judge board favorability for current player
    prompt=f"Current board (rows): {g.board}. Who is likely to win? Respond with X, O, or Draw."
    try:
        ans=openai.ChatCompletion.create(model=MODEL,messages=[{"role":"user","content":prompt}],temperature=0)
        return ans.choices[0].message.content.strip()
    except:
        # fallback random
        while True:
            g.play(random.choice(g.moves()))
            winner=g.winner()
            if winner: return winner


---

## 3️⃣ Play a Match: MCTS vs. Random

In [None]:
def play_game():
    game=TicTacToe()
    while True:
        if game.player=='X':
            move=mcts(game.copy(), iterations=300, rollout_random=False)
        else:
            move=random.choice(game.moves())
        game.play(move)
        winner=game.winner()
        if winner:
            print(game)
            print("Winner:", winner)
            return winner
play_game()

### 📝 Experiments

* Increase `iterations` to improve MCTS strength.  
* Toggle `rollout_random=True/False` to compare **random** vs. **LLM‑guided** leaf evaluation (requires API key).  
* Observe win‑rate over 20 games. Can MCTS with LLM evaluation beat pure random faster?

---

## 🔗 References

* Browne et al., “A Survey of Monte‑Carlo Tree Search Methods”, 2012  
* Silver et al., “AlphaGo”, 2016 – policy/value networks + MCTS  
* Yao et al., 2023 – Tree‑of‑Thought parallels
