##### Import of libraries

In [8]:
import numpy as np
from pettingzoo.classic import tictactoe_v3
from ctransformers import AutoModelForCausalLM
import os
import numpy as np
import re
import requests
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import random
from collections import deque

##### Stepst to create DeepLearning agent

Define the Neural Network Class (Q-Network) - simple MLP network

In [9]:
class QNetwork(nn.Module):
    def __init__(self, input_size=9, output_size=9):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


Define DQAgent class with PyTorch

In [None]:

class DQAgent:
    def __init__(self, name, state_size, action_size, epsilon=0.1, lr=0.001, gamma=0.95):
        self.name = name
        self.state_size = state_size
        self.action_size = action_size
        self.epsilon = epsilon  # exploration rate
        self.gamma = gamma  # discount factor
        self.memory = deque(maxlen=2000)

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = QNetwork(state_size, action_size).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.loss_fn = nn.MSELoss()

    
    def choose_action(self, state):
        valid_moves = np.where(state == 0)[0]

        if np.random.rand() < self.epsilon:
            # Exploration: Choose random valid move
            action = int(np.random.choice(valid_moves))
            print(f"[{self.name}] (Exploring) Chosen action: {action}")
        else:
            # Exploitation: Choose best move based on Q-values
            state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
            with torch.no_grad():
                q_values = self.model(state_tensor).cpu().numpy().flatten()

            masked_q_values = np.full(q_values.shape, -np.inf)
            masked_q_values[valid_moves] = q_values[valid_moves]
            action = int(np.argmax(masked_q_values))
            print(f"[{self.name}] Valid moves: {valid_moves}")
            print(f"[{self.name}] Chosen action: {action}")

        return action

    def remember(self, state, action, reward):
        self.memory.append((state, action, reward))

    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return

        minibatch = random.sample(self.memory, batch_size)
        states, actions, rewards = zip(*minibatch)

        states_tensor = torch.FloatTensor(states).to(self.device)
        actions_tensor = torch.LongTensor(actions).to(self.device)
        rewards_tensor = torch.FloatTensor(rewards).to(self.device)

        self.optimizer.zero_grad()
        predictions = self.model(states_tensor)
        predicted_q = predictions.gather(1, actions_tensor.unsqueeze(1)).squeeze()

        # Q-learning update (using immediate reward as this is episodic TicTacToe)
        loss = self.loss_fn(predicted_q, rewards_tensor)
        loss.backward()
        self.optimizer.step()

    # Function to store to save the DQ Agent model
    def save_model(self, path="dq_model.pth"):
        """Save model weights to disk."""
        torch.save(self.model.state_dict(), path)
        print(f"Model saved to {path}")

    # Function to load the trained DQ Agent model from disk for the evaluation phase
    def load_model(self, path="dq_model.pth"):
        """Load model weights from disk."""
        self.model.load_state_dict(torch.load(path))
        self.model.eval()  # Set model to evaluation mode
        print(f"Model loaded from {path}")


    

##### Steps to create LLM agent

In [11]:
# LLM API details (Modify if needed)
LLM_API_URL = "http://localhost:1234/v1/chat/completions"  # Change to your LM Studio API URL
MODEL_NAME ="Mistral-Nemo-Instruct-2407-GGUF"
#MODEL_NAME = "Phi-4-mini-instruct-GGUF"

Definition of the instruction

In [12]:
instruction = """
You are an expert Tic-Tac-Toe player. You must always follow the best strategy to win the game or block your opponent from winning.

Use this reasoning process:

1. Check if YOU can win on this turn. If yes, make that move.
2. If not, check if your OPPONENT could win on their next move. If yes, block it.
3. Otherwise, choose the best available strategic position (like center or corners).

Output ONLY the number of the chosen move (0–8) based on your decision.

Think step-by-step before giving the final number. But output only the number, without explanation.
"""

Building the prompt using information from the environment

In [13]:
def build_prompt(board_array, valid_moves, agent_name):
    board_str = ""
    cell_num = 0
    for row in board_array:
        row_str = ""
        for cell in row:
            if cell[0] == 1:
                row_str += "X"
            elif cell[1] == 1:
                row_str += "O"
            else:
                row_str += str(cell_num)
            cell_num += 1
        board_str += row_str + "\n"

    current_player = "X" if agent_name == "player_1" else "O"
    opponent = "O" if current_player == "X" else "X"

    prompt = f"""
    You are playing as {current_player}. Your opponent is {opponent}.

    This is the current Tic-Tac-Toe board. The numbers indicate empty positions:

    {board_str}

    Valid move positions: {valid_moves.tolist()}

    Think step-by-step:
    - Can you win this turn? Play that move.
    - Can your opponent win next turn? Block it.
    - Otherwise, choose the best long-term position.

    Now make your move by writing only the position number (0–8).
    """
    return prompt.strip()

Querying the LLM with the instruction and prompt

In [14]:
def query_llm(model, api_url, agent_name, instruction, prompt):
    payload = {
        "model": model,
        "messages": [
            {"role": "system", "content": instruction},
            {"role": "user", "content": prompt}
        ],
        "temperature": 0.0,
        "max_tokens": 8,
        "stop": ["\n"]
    }

    try:
        # Added debug prints to check the payload and API URL
        #print(f"\n[{agent_name}] Sending request to: {api_url}")
        #print(f"[{agent_name}] Payload:\n{json.dumps(payload, indent=2)}")

        response = requests.post(api_url, json=payload)
        response.raise_for_status()
        raw = response.json()

        # Added debug prints to check the raw response
        #print(f"[{agent_name}] Raw response:\n{json.dumps(raw, indent=2)}")

        return raw["choices"][0]["message"]["content"].strip()

    except Exception as e:
        print(f"[{agent_name}] LLM call failed: {e}")
        return ""

Parse LLM output, if LLM chose invalid move or if LLM move is not understood use fallback

In [15]:
def parse_llm_output(output, valid_moves, agent_name):
    match = re.search(r"\b(\d)\b", output.strip())
    if match:
        move = int(match.group(1))
        if move in valid_moves:
            return move
        else:
            print(f"[{agent_name}] LLM chose invalid move: {move}, not in {valid_moves}")
    else:
        print(f"[{agent_name}] LLM output not understood: '{output}'")

    fallback = int(np.random.choice(valid_moves))
    print(f"[{agent_name}] Fallback random move: {fallback}")
    return fallback

Define LLM Agent class with action method

In [16]:
class LLM_Agent:
    def __init__(self, name, model=MODEL_NAME, api_url="http://localhost:1234/v1/chat/completions"):
        self.name = name
        self.model = model
        self.api_url = api_url
    
    def get_action(self, observation, agent_name):
        board = observation["observation"].reshape(3, 3, 2)
        valid_moves = np.flatnonzero(observation["action_mask"])
        print(f"[{self.name}] valid moves: {valid_moves}\n")
        
        prompt = build_prompt(board, valid_moves, agent_name)
        response = query_llm(self.model, self.api_url, self.name, instruction, prompt)

        action = parse_llm_output(response, valid_moves, self.name)
        print(f"[{self.name}] Chosen action: {action}\n")

        return action

##### Steps to create the game using the two agents

In [17]:
# Use this to detect the winner 
def print_final_board(board):
    for row in board:
        row_str = ""
        for cell in row:
            if cell[0] == 1:
                row_str += "X"
            elif cell[1] == 1:
                row_str += "O"
            else:
                row_str += " "
        print(row_str)

# Use this function to detect the winner because the rewards from the environment are always 0
def detect_winner(board):
    board = board.argmax(axis=2)  # 0 for X, 1 for O, 2 = empty
    for player, label in [(0, "player_1"), (1, "player_2")]:
        for i in range(3):
            if all(board[i, :] == player): return label
            if all(board[:, i] == player): return label
        if all(np.diag(board) == player): return label
        if all(np.diag(np.fliplr(board)) == player): return label
    return None



Function for transforming the game state from the Tic-Tac-Toe environment into a format 
the DQAgent's neural network can understand and learn from

In [18]:
def preprocess_observation(obs):
    board = obs["observation"]
    board_processed = board[:,:,0] - board[:,:,1]
    return board_processed.flatten()

##### Running game once with no training DQ Agent

In [3]:
def run_game(render=False):
    render_mode = "human" if render else None
    env = tictactoe_v3.env(render_mode=render_mode)
    env.reset(seed=None)

    agent_1 = LLM_Agent(
        name="Mistral_X",
        model="Mistral-Nemo-Instruct-2407-GGUF",
        api_url="http://localhost:1234/v1/chat/completions"
    )

    agent_2 = DQAgent(
        name="DQAgent_O",
        state_size=9,
        action_size=9,
        epsilon=0.1
    )

    agents = {"player_1": agent_1, "player_2": agent_2}
    player_symbols = {"player_1": "X", "player_2": "O"}
    all_agents = env.possible_agents[:]
    move_history = []
    state_memory = []
    winner = None

    start_time = time.time()

    last_obs = None  # Save the last meaningful board state

    for agent_name in env.agent_iter():
        obs, reward, terminated, truncated, info = env.last()
        done = terminated or truncated

        if not done:
            last_obs = obs  # Save last non-terminal obs

            if agent_name == "player_1":
                action = agents[agent_name].get_action(obs, agent_name)
            else:
                state = preprocess_observation(obs)
                action = agents[agent_name].choose_action(state)
                state_memory.append((state, action))

            move_history.append((agent_name, action))
        else:
            action = None  # step(None) for done agent

        env.step(action)

    end_time = time.time()
    elapsed_time = end_time - start_time

    # Use last_obs to evaluate final board (non-terminal)
    if last_obs is not None:
        final_board = last_obs["observation"].reshape(3, 3, 2)
    else:
        final_obs = env.observe(all_agents[0])
        final_board = final_obs["observation"].reshape(3, 3, 2)

    winner = detect_winner(final_board)

    rewards = {agent: 0 for agent in all_agents}
    if winner:
        rewards[winner] = 1
        loser = [a for a in all_agents if a != winner][0]
        rewards[loser] = -1

    final_reward = rewards["player_2"]
    for state, action in state_memory:
        agent_2.remember(state, action, final_reward)
    agent_2.replay()

    print("🏁 Game Over!")
    print_final_board(final_board)
    print(f"⏱️ Duration: {elapsed_time:.2f} sec\nRewards: {rewards}")
    print(f"🏆 Winner: {agents[winner].name}" if winner else "🤝 Draw")

    for i, (agent, move) in enumerate(move_history):
        print(f"Turn {i+1}: {agent} → {move}")

    env.close()
    return rewards


In [54]:
if __name__ == "__main__":
    run_game(render=True)

[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [1 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [4 6 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [4 6 8]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
 O 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 0
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 5
Turn 6: player_2 → 7
Turn 7: player_1 → 4


##### Running the game 10 times with no training DQ agent

In [55]:
def run_multiple_episodes(n_episodes=10):
    total_rewards = {"player_1": 0, "player_2": 0}
    wins = {"player_1": 0, "player_2": 0}
    draws = 0

    for episode in range(1, n_episodes + 1):
        print(f"\n=== 🎮 Episode {episode} ===")
        render = (episode == n_episodes)  # Show only the final game
        rewards = run_game(render=render)

        for agent, reward in rewards.items():
            total_rewards[agent] += reward
            if reward == 1:
                wins[agent] += 1

        if all(r == 0 for r in rewards.values()):
            draws += 1

    # Win rate report
    print("\n==============================")
    print(f"📊 Results after {n_episodes} episodes:")
    for agent in total_rewards:
        print(f"🏆 {agent} → Wins: {wins[agent]} | Total Score: {total_rewards[agent]}")
    print(f"🤝 Draws: {draws}")
    print("==============================\n")

In [56]:
if __name__ == "__main__":
    run_multiple_episodes(n_episodes=10)


=== 🎮 Episode 1 ===
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [1 4 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [1 4 6]

[Mistral_X] LLM chose invalid move: 2, not in [1 4 6]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [4 6]
[DQAgent_O] Chosen action: 6
🏁 Game Over!
XOO
O O
 XX
⏱️ Duration: 13.87 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 1
Turn 8: player_2 → 6

=== 🎮 Episode 2 ===
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[

##### Running game 1000 times for training DQ Agent

In [None]:
def run_game_with_agents(agent_1, agent_2, render=False):
    render_mode = "human" if render else None
    env = tictactoe_v3.env(render_mode=render_mode)
    env.reset(seed=None)

    agents = {"player_1": agent_1, "player_2": agent_2}
    all_agents = env.possible_agents[:]
    move_history = []
    state_memory = []

    last_obs = None
    winner = None

    for agent_name in env.agent_iter():
        obs, reward, terminated, truncated, info = env.last()
        done = terminated or truncated

        if not done:
            last_obs = obs

            if agent_name == "player_1":
                action = agent_1.get_action(obs, agent_name)
            else:
                state = preprocess_observation(obs)
                action = agent_2.choose_action(state)
                state_memory.append((state, action))
            
            move_history.append((agent_name, action))
        else:
            action = None

        env.step(action)

    # Final board state
    if last_obs is not None:
        final_board = last_obs["observation"].reshape(3, 3, 2)
    else:
        final_obs = env.observe(all_agents[0])
        final_board = final_obs["observation"].reshape(3, 3, 2)

    winner = detect_winner(final_board)

    rewards = {agent: 0 for agent in all_agents}
    if winner:
        rewards[winner] = 1
        loser = [a for a in all_agents if a != winner][0]
        rewards[loser] = -1

    # Train the DQAgent (only player_2)
    final_reward = rewards["player_2"]
    for state, action in state_memory:
        agent_2.remember(state, action, final_reward)
    agent_2.replay()

    # Always print full game output after each episode used for debugging purposes and to check the model performance
    print("🏁 Game Over!")
    print_final_board(final_board)
    print(f"Rewards: {rewards}")
    print(f"🏆 Winner: {agents[winner].name}" if winner else "🤝 Draw")
    for i, (agent, move) in enumerate(move_history):
        print(f"Turn {i+1}: {agent} → {move}")

    env.close()
    return rewards


In [None]:
def run_multiple_episodes(n_episodes=1000, save_model_path="dq_model.pth"):
    agent_1 = LLM_Agent(
        name="Mistral_X",
        model="Mistral-Nemo-Instruct-2407-GGUF",
        api_url="http://localhost:1234/v1/chat/completions"
    )

    agent_2 = DQAgent(
        name="DQAgent_O",
        state_size=9,
        action_size=9,
        epsilon=1.0  # start with high exploration
    )

    wins = {"player_1": 0, "player_2": 0}
    draws = 0

    for episode in range(1, n_episodes + 1):
        print(f"\n=== 🎮 Episode {episode} ===")
        render = (episode == n_episodes)

        # Run one episode
        rewards = run_game_with_agents(agent_1, agent_2, render=render)

        # Tally results
        if rewards["player_1"] == 1:
            wins["player_1"] += 1
        elif rewards["player_2"] == 1:
            wins["player_2"] += 1
        else:
            draws += 1

        # Decay epsilon after each episode
        # This makes the agent explore a lot early on, and learn to exploit later, which is key for performance in DQN
        agent_2.epsilon = max(0.01, agent_2.epsilon * 0.995)

        # Show progress every 100 episodes (or on the first)
        if episode % 100 == 0 or episode == 1:
            print(f"After {episode} episodes:")
            print(f"    🏆 Wins — Player 1: {wins['player_1']}, Player 2: {wins['player_2']}")
            print(f"    🤝 Draws: {draws}")
    
    
    # Save model after training
    agent_2.save_model(save_model_path)

    print("\n=== Training Complete ===")
    print(f"🏆 Wins — Player 1: {wins['player_1']}, Player 2: {wins['player_2']}")
    print(f"🤝 Draws: {draws}")



In [None]:
# Note output is used to analyze the models movements
if __name__ == "__main__":
    run_multiple_episodes(n_episodes=1000, save_model_path="dq_model.pth")

Training episodes:   0%|          | 0/1000 [00:00<?, ?it/s]

[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [7]



Training episodes:   0%|          | 1/1000 [00:17<4:45:56, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0]



Training episodes:   0%|          | 2/1000 [00:34<4:44:16, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 XX
OOX
XOO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] 

Training episodes:   0%|          | 3/1000 [00:51<4:45:29, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 XX
OOX
XOO
⏱️ Duration: 17.28 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] 

Training episodes:   0%|          | 4/1000 [01:08<4:44:32, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0]



Training episodes:   0%|          | 5/1000 [01:25<4:45:28, 17.21s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 XX
OOX
XOO
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] 

Training episodes:   1%|          | 6/1000 [01:43<4:45:22, 17.23s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.25 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0]



Training episodes:   1%|          | 7/1000 [02:00<4:43:38, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 XX
OOX
XOO
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

  states_tensor = torch.FloatTensor(states).to(self.device)
Training episodes:   1%|          | 8/1000 [02:17<4:43:50, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:   1%|          | 9/1000 [02:34<4:43:45, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [6 7 8]



Training episodes:   1%|          | 10/1000 [02:48<4:26:00, 16.12s/it]

[Mistral_X] Chosen action: 6

🏁 Game Over!
OOX
OXX
   
⏱️ Duration: 13.74 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 4
Turn 6: player_2 → 0
Turn 7: player_1 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [6 7 8]



Training episodes:   1%|          | 11/1000 [03:02<4:14:12, 15.42s/it]

[Mistral_X] Chosen action: 6

🏁 Game Over!
OOX
OXX
   
⏱️ Duration: 13.83 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 4
Turn 6: player_2 → 0
Turn 7: player_1 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [6 7 8]



Training episodes:   1%|          | 12/1000 [03:15<4:05:43, 14.92s/it]

[Mistral_X] Chosen action: 6

🏁 Game Over!
OOX
OXX
   
⏱️ Duration: 13.77 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 4
Turn 6: player_2 → 0
Turn 7: player_1 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [6 7 8]



Training episodes:   1%|▏         | 13/1000 [03:29<3:59:18, 14.55s/it]

[Mistral_X] Chosen action: 6

🏁 Game Over!
OOX
OXX
   
⏱️ Duration: 13.68 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 4
Turn 6: player_2 → 0
Turn 7: player_1 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [6 7 8]



Training episodes:   1%|▏         | 14/1000 [03:43<3:55:04, 14.30s/it]

[Mistral_X] Chosen action: 6

🏁 Game Over!
OOX
OXX
   
⏱️ Duration: 13.73 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 4
Turn 6: player_2 → 0
Turn 7: player_1 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]



Training episodes:   2%|▏         | 15/1000 [03:57<3:51:59, 14.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
O X
X  
⏱️ Duration: 13.72 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]



Training episodes:   2%|▏         | 16/1000 [04:10<3:50:18, 14.04s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
O X
X  
⏱️ Duration: 13.83 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]



Training episodes:   2%|▏         | 17/1000 [04:24<3:48:17, 13.93s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
O X
X  
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 3
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:   2%|▏         | 18/1000 [04:42<4:05:38, 15.01s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.50 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:   2%|▏         | 19/1000 [04:59<4:16:43, 15.70s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]



Training episodes:   2%|▏         | 20/1000 [05:13<4:06:32, 15.09s/it]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
O X
OOX
X  
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 0
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [1 8]
[DQAgent_O] Chosen 

Training episodes:   2%|▏         | 21/1000 [05:30<4:16:17, 15.71s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
OOX
XX 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]



Training episodes:   2%|▏         | 22/1000 [05:44<4:06:32, 15.13s/it]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
🏁 Game Over!
XOO
XXO
O  
⏱️ Duration: 13.76 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]



Training episodes:   2%|▏         | 23/1000 [05:57<3:59:42, 14.72s/it]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
O X
OOX
X  
⏱️ Duration: 13.77 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 0
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]



Training episodes:   2%|▏         | 24/1000 [06:11<3:54:15, 14.40s/it]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
🏁 Game Over!
XOO
XXO
O  
⏱️ Duration: 13.65 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]



Training episodes:   2%|▎         | 25/1000 [06:25<3:50:52, 14.21s/it]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
O X
XOX
O  
⏱️ Duration: 13.75 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 0
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 4 7 8]



Training episodes:   3%|▎         | 26/1000 [06:35<3:31:39, 13.04s/it]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
  X
O X
O  
⏱️ Duration: 10.30 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 3 4 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]



Training episodes:   3%|▎         | 27/1000 [06:49<3:34:58, 13.26s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
O  
⏱️ Duration: 13.76 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 3 4 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]



Training episodes:   3%|▎         | 28/1000 [07:03<3:37:01, 13.40s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
O  
⏱️ Duration: 13.71 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 0
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:   3%|▎         | 29/1000 [07:16<3:38:03, 13.47s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
X  
⏱️ Duration: 13.65 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen 

Training episodes:   3%|▎         | 30/1000 [07:33<3:56:04, 14.60s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XOO
X X
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

Training episodes:   3%|▎         | 31/1000 [07:51<4:08:14, 15.37s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
XOO
XX 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] 

Training episodes:   3%|▎         | 32/1000 [08:08<4:16:40, 15.91s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XOO
X X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

Training episodes:   3%|▎         | 33/1000 [08:25<4:23:14, 16.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
XOO
XX 
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:   3%|▎         | 34/1000 [08:39<4:10:16, 15.55s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
X  
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:   4%|▎         | 35/1000 [08:52<4:00:50, 14.97s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
X  
⏱️ Duration: 13.63 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 2, not in [1 4 7]
[Mistral_X] Fallback r

Training episodes:   4%|▎         | 36/1000 [09:10<4:11:39, 15.66s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.26 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 8
Turn 7: player_1 → 4
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:   4%|▎         | 37/1000 [09:27<4:19:29, 16.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.33 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 2, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:   4%|▍         | 38/1000 [09:44<4:24:07, 16.47s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
X X
XOO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3,

Training episodes:   4%|▍         | 39/1000 [10:01<4:26:36, 16.65s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 4
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4

Training episodes:   4%|▍         | 40/1000 [10:18<4:28:20, 16.77s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 XX
XOO
OXO
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [1 4 7]



Training episodes:   4%|▍         | 41/1000 [10:32<4:13:43, 15.87s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
X X
X O
O O
⏱️ Duration: 13.77 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 8
Turn 7: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Cho

Training episodes:   4%|▍         | 42/1000 [10:49<4:19:45, 16.27s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OXX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 8
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] Chosen action: 4

[DQAgent

Training episodes:   4%|▍         | 43/1000 [11:07<4:24:29, 16.58s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OXX
XXO
O O
⏱️ Duration: 17.30 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 8
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4]

[Mistral_X] LLM chose invalid move: 5,

Training episodes:   4%|▍         | 44/1000 [11:24<4:27:04, 16.76s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:   4%|▍         | 45/1000 [11:41<4:28:19, 16.86s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 8
Turn 7: player_1 → 4
Turn 8: player_2 → 1
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:   5%|▍         | 46/1000 [11:58<4:29:01, 16.92s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7

Training episodes:   5%|▍         | 47/1000 [12:15<4:29:51, 16.99s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OXX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] Chosen action: 4

[DQAgent

Training episodes:   5%|▍         | 48/1000 [12:32<4:30:12, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OXX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 8
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4]

[Mistral_X] LLM chose invalid move: 5,

Training episodes:   5%|▍         | 49/1000 [12:49<4:29:57, 17.03s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4 6 7]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 6 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [7]



Training episodes:   5%|▌         | 50/1000 [13:06<4:30:14, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OXX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 1
Turn 6: player_2 → 6
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4 6 7]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 6 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 2,

Training episodes:   5%|▌         | 51/1000 [13:24<4:30:45, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 0
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4 6 7]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 6 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 6 7]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [7]



Training episodes:   5%|▌         | 52/1000 [13:41<4:30:00, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 6
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 4 6 7]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 6 7]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:   5%|▌         | 53/1000 [13:58<4:30:02, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
XXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]



Training episodes:   5%|▌         | 54/1000 [14:11<4:13:18, 16.07s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.63 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 55/1000 [14:25<4:02:03, 15.37s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.73 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 56/1000 [14:39<3:54:19, 14.89s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 57/1000 [14:53<3:48:20, 14.53s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7

Training episodes:   6%|▌         | 58/1000 [15:10<4:00:11, 15.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 59/1000 [15:23<3:52:29, 14.82s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.71 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 60/1000 [15:37<3:47:06, 14.50s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.73 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 61/1000 [15:51<3:43:05, 14.26s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▌         | 62/1000 [16:05<3:40:22, 14.10s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.72 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▋         | 63/1000 [16:18<3:38:10, 13.97s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   6%|▋         | 64/1000 [16:32<3:36:41, 13.89s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 4]
[DQAgent_O] Chosen 

Training episodes:   6%|▋         | 65/1000 [16:49<3:51:40, 14.87s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 66/1000 [17:03<3:45:48, 14.51s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.66 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 67/1000 [17:17<3:41:53, 14.27s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.71 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 68/1000 [17:30<3:39:35, 14.14s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.82 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 6 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 69/1000 [17:44<3:37:19, 14.01s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 8
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 70/1000 [17:58<3:35:56, 13.93s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.75 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 71/1000 [18:12<3:35:05, 13.89s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.79 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   7%|▋         | 72/1000 [18:25<3:34:16, 13.85s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.76 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 4]
[DQAgent_O] Chosen 

Training episodes:   7%|▋         | 73/1000 [18:43<3:49:25, 14.85s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

Training episodes:   7%|▋         | 74/1000 [19:00<3:59:58, 15.55s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

Training episodes:   8%|▊         | 75/1000 [19:17<4:08:04, 16.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
OXO
⏱️ Duration: 17.35 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   8%|▊         | 76/1000 [19:31<3:57:55, 15.45s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.94 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   8%|▊         | 77/1000 [19:45<3:50:59, 15.02s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.99 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   8%|▊         | 78/1000 [19:59<3:46:05, 14.71s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [4 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
O O
X X
⏱️ Duration: 13.99 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   8%|▊         | 79/1000 [20:13<3:42:01, 14.46s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.87 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 4]
[DQAgent_O] Chosen 

Training episodes:   8%|▊         | 80/1000 [20:30<3:54:03, 15.26s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:   8%|▊         | 81/1000 [20:47<4:02:17, 15.82s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:   8%|▊         | 82/1000 [21:05<4:09:09, 16.28s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]



Training episodes:   8%|▊         | 83/1000 [21:18<3:57:54, 15.57s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X X
O O
⏱️ Duration: 13.88 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 4]
[DQAgent_O] Chosen 

Training episodes:   8%|▊         | 84/1000 [21:36<4:06:17, 16.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.44 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 5 6 7]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:   8%|▊         | 85/1000 [21:53<4:10:01, 16.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 8
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:   9%|▊         | 86/1000 [22:10<4:12:51, 16.60s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:   9%|▊         | 87/1000 [22:27<4:15:01, 16.76s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 6 7]



Training episodes:   9%|▉         | 88/1000 [22:41<4:01:05, 15.86s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 6 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [6 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
OXO
OXO
  X
⏱️ Duration: 13.76 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 89/1000 [22:55<3:51:06, 15.22s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 6]
[DQAgent_O] Chosen action: 6
🏁 Game Over!
 OO
OXO
 XX
⏱️ Duration: 13.72 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 1
Turn 8: player_2 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 90/1000 [23:08<3:43:49, 14.76s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 6]
[DQAgent_O] Chosen action: 6
🏁 Game Over!
 OO
OXO
 XX
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 1
Turn 8: player_2 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 91/1000 [23:22<3:38:58, 14.45s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 6]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
O O
OXO
 XX
⏱️ Duration: 13.74 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 0
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 92/1000 [23:36<3:35:03, 14.21s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 6]
[DQAgent_O] Chosen action: 6
🏁 Game Over!
 OO
OXO
 XX
⏱️ Duration: 13.64 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 1
Turn 8: player_2 → 6
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 93/1000 [23:49<3:32:31, 14.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
  O
OXO
OXX
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 6
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:   9%|▉         | 94/1000 [24:03<3:30:42, 13.95s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 6]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
O O
OXO
 XX
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 0
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|▉         | 95/1000 [24:17<3:29:03, 13.86s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 6]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
O O
OXO
 XX
⏱️ Duration: 13.64 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 0
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|▉         | 96/1000 [24:30<3:28:05, 13.81s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 6]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
O O
OXO
 XX
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 0
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|▉         | 97/1000 [24:44<3:27:01, 13.76s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
  O
OXO
OXX
⏱️ Duration: 13.62 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 6
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|▉         | 98/1000 [24:58<3:25:50, 13.69s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 6]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
O O
OXO
 XX
⏱️ Duration: 13.54 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 0
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|▉         | 99/1000 [25:11<3:25:36, 13.69s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
  O
OXO
OXX
⏱️ Duration: 13.68 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 6
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 1 6]



Training episodes:  10%|█         | 100/1000 [25:25<3:25:26, 13.70s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 1 6]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 1
🏁 Game Over!
  O
OXO
OXX
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 7
Turn 7: player_1 → 6
Turn 8: player_2 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  10%|█         | 101/1000 [25:39<3:24:48, 13.67s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.60 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  10%|█         | 102/1000 [25:52<3:24:48, 13.68s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.71 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  10%|█         | 103/1000 [26:06<3:25:11, 13.73s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 0
🏁 Game Over!
  O
OXO
XOX
⏱️ Duration: 13.81 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  10%|█         | 104/1000 [26:20<3:25:15, 13.74s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  10%|█         | 105/1000 [26:34<3:25:32, 13.78s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 0
🏁 Game Over!
  O
OXO
XOX
⏱️ Duration: 13.85 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 7
Turn 8: player_2 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]



Training episodes:  11%|█         | 106/1000 [26:48<3:25:21, 13.78s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 3 6 7]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 6 7]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0

Training episodes:  11%|█         | 107/1000 [27:05<3:39:21, 14.74s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 8
Turn 5: player_1 → 3
Turn 6: player_2 → 6
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [7]



Training episodes:  11%|█         | 108/1000 [27:22<3:49:56, 15.47s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  11%|█         | 109/1000 [27:35<3:41:42, 14.93s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 0
🏁 Game Over!
  O
OXO
XOX
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  11%|█         | 110/1000 [27:49<3:36:02, 14.56s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.71 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  11%|█         | 111/1000 [28:03<3:32:19, 14.33s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  11%|█         | 112/1000 [28:17<3:29:06, 14.13s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1]
[DQAgent_O] Chosen action: 0
🏁 Game Over!
  O
OXO
XOX
⏱️ Duration: 13.65 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 6
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  11%|█▏        | 113/1000 [28:30<3:26:46, 13.99s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.64 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen 

Training episodes:  11%|█▏        | 114/1000 [28:47<3:40:26, 14.93s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  12%|█▏        | 115/1000 [29:04<3:49:28, 15.56s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  12%|█▏        | 116/1000 [29:21<3:56:14, 16.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  12%|█▏        | 117/1000 [29:39<4:00:23, 16.33s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  12%|█▏        | 118/1000 [29:52<3:48:53, 15.57s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen 

Training episodes:  12%|█▏        | 119/1000 [30:09<3:54:58, 16.00s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  12%|█▏        | 120/1000 [30:26<3:59:13, 16.31s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  12%|█▏        | 121/1000 [30:40<3:47:40, 15.54s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.74 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 3 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 7 8]



Training episodes:  12%|█▏        | 122/1000 [30:54<3:39:46, 15.02s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.79 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 3
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 3 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 7 8]



Training episodes:  12%|█▏        | 123/1000 [31:08<3:33:57, 14.64s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.74 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 3
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 3 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen 

Training episodes:  12%|█▏        | 124/1000 [31:25<3:44:46, 15.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
OOX
XXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 3
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 3 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  12%|█▎        | 125/1000 [31:42<3:52:12, 15.92s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
OOX
X O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 3
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 3 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 7 8]



Training episodes:  13%|█▎        | 126/1000 [31:56<3:42:21, 15.27s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.72 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 3
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  13%|█▎        | 127/1000 [32:10<3:36:24, 14.87s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.95 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  13%|█▎        | 128/1000 [32:23<3:30:50, 14.51s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.64 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 6 7 8]
[DQAgent_O] Chosen action: 3
[Mistral_X] valid moves: [0 1 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  13%|█▎        | 129/1000 [32:37<3:27:09, 14.27s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
OOX
X  
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 4
Turn 3: player_1 → 5
Turn 4: player_2 → 3
Turn 5: player_1 → 6
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 6 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 6 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [6 7]
[DQAgent_O] Chosen 

Training episodes:  13%|█▎        | 130/1000 [32:54<3:39:14, 15.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 6
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 6 7]



Training episodes:  13%|█▎        | 131/1000 [33:08<3:32:50, 14.70s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 6 7]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 XO
OXO
O X
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 6
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 4 5 6 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 6 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 6 7]



Training episodes:  13%|█▎        | 132/1000 [33:22<3:28:15, 14.40s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 6 7]
[Mistral_X] Fallback random move: 6
[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 XO
OXO
O X
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 4
Turn 5: player_1 → 5
Turn 6: player_2 → 8
Turn 7: player_1 → 6
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0

Training episodes:  13%|█▎        | 133/1000 [33:39<3:39:28, 15.19s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  13%|█▎        | 134/1000 [33:56<3:47:31, 15.76s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  14%|█▎        | 135/1000 [34:09<3:38:18, 15.14s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.69 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 0

Training episodes:  14%|█▎        | 136/1000 [34:27<3:46:58, 15.76s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 0
Turn 8: player_2 → 1
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 1 7]



Training episodes:  14%|█▎        | 137/1000 [34:40<3:37:50, 15.15s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 1 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 7]
[DQAgent_O] Chosen action: 7
🏁 Game Over!
 OO
OXO
X X
⏱️ Duration: 13.70 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 8
Turn 7: player_1 → 1
Turn 8: player_2 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 7

Training episodes:  14%|█▍        | 138/1000 [34:57<3:46:08, 15.74s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
XXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] 

Training episodes:  14%|█▍        | 139/1000 [35:15<3:51:47, 16.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
XXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]



Training episodes:  14%|█▍        | 140/1000 [35:28<3:40:54, 15.41s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
X O
X O
⏱️ Duration: 13.68 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]



Training episodes:  14%|█▍        | 141/1000 [35:42<3:32:54, 14.87s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
X O
X O
⏱️ Duration: 13.60 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 6 7 8]

[Mistral_X] Chosen action: 6

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0 4 7]



Training episodes:  14%|█▍        | 142/1000 [35:55<3:27:25, 14.50s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
 OX
X O
X O
⏱️ Duration: 13.64 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 1
Turn 5: player_1 → 6
Turn 6: player_2 → 8
Turn 7: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback r

Training episodes:  14%|█▍        | 143/1000 [36:13<3:38:30, 15.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  14%|█▍        | 144/1000 [36:30<3:46:15, 15.86s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  14%|█▍        | 145/1000 [36:47<3:50:55, 16.21s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  15%|█▍        | 146/1000 [37:04<3:53:55, 16.43s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  15%|█▍        | 147/1000 [37:21<3:56:10, 16.61s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  15%|█▍        | 148/1000 [37:38<3:57:29, 16.72s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  15%|█▍        | 149/1000 [37:55<3:58:22, 16.81s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  15%|█▌        | 150/1000 [38:12<3:58:57, 16.87s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
XOX
XOO
OX 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  15%|█▌        | 151/1000 [38:29<3:59:47, 16.95s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  15%|█▌        | 152/1000 [38:46<4:00:41, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]



Training episodes:  15%|█▌        | 153/1000 [39:00<3:46:51, 16.07s/it]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
🏁 Game Over!
 OO
OOX
XX 
⏱️ Duration: 13.82 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random mov

Training episodes:  15%|█▌        | 154/1000 [39:17<3:51:04, 16.39s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  16%|█▌        | 155/1000 [39:34<3:54:10, 16.63s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  16%|█▌        | 156/1000 [39:51<3:55:41, 16.76s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
XOX
XOO
OX 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  16%|█▌        | 157/1000 [40:08<3:56:48, 16.85s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  16%|█▌        | 158/1000 [40:26<3:57:34, 16.93s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  16%|█▌        | 159/1000 [40:43<3:59:15, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  16%|█▌        | 160/1000 [41:00<3:59:56, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.29 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  16%|█▌        | 161/1000 [41:17<3:59:14, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  16%|█▌        | 162/1000 [41:34<3:58:33, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  16%|█▋        | 163/1000 [41:51<3:58:40, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  16%|█▋        | 164/1000 [42:05<3:43:45, 16.06s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.60 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen 

Training episodes:  16%|█▋        | 165/1000 [42:22<3:48:15, 16.40s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  17%|█▋        | 166/1000 [42:39<3:51:20, 16.64s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  17%|█▋        | 167/1000 [42:53<3:39:07, 15.78s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.77 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen 

Training episodes:  17%|█▋        | 168/1000 [43:10<3:44:19, 16.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  17%|█▋        | 169/1000 [43:27<3:47:57, 16.46s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  17%|█▋        | 170/1000 [43:41<3:36:26, 15.65s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.74 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  17%|█▋        | 171/1000 [43:55<3:28:22, 15.08s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.76 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  17%|█▋        | 172/1000 [44:09<3:22:18, 14.66s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.67 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen 

Training episodes:  17%|█▋        | 173/1000 [44:26<3:31:59, 15.38s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  17%|█▋        | 174/1000 [44:43<3:38:56, 15.90s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  18%|█▊        | 175/1000 [45:00<3:43:31, 16.26s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  18%|█▊        | 176/1000 [45:14<3:32:35, 15.48s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.66 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen 

Training episodes:  18%|█▊        | 177/1000 [45:31<3:38:43, 15.95s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  18%|█▊        | 178/1000 [45:48<3:42:52, 16.27s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  18%|█▊        | 179/1000 [46:01<3:31:54, 15.49s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.65 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen 

Training episodes:  18%|█▊        | 180/1000 [46:18<3:38:22, 15.98s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  18%|█▊        | 181/1000 [46:36<3:43:04, 16.34s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  18%|█▊        | 182/1000 [46:49<3:32:23, 15.58s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.79 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  18%|█▊        | 183/1000 [47:03<3:24:49, 15.04s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen 

Training episodes:  18%|█▊        | 184/1000 [47:20<3:33:06, 15.67s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  18%|█▊        | 185/1000 [47:34<3:25:10, 15.10s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback r

Training episodes:  19%|█▊        | 186/1000 [47:51<3:33:35, 15.74s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
XOX
XOO
OX 
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  19%|█▊        | 187/1000 [48:09<3:38:59, 16.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 188/1000 [48:26<3:43:45, 16.53s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 189/1000 [48:43<3:47:02, 16.80s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.40 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  19%|█▉        | 190/1000 [49:00<3:47:59, 16.89s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 191/1000 [49:18<3:49:06, 16.99s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 192/1000 [49:35<3:49:12, 17.02s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 193/1000 [49:52<3:48:51, 17.02s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  19%|█▉        | 194/1000 [50:09<3:49:02, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  20%|█▉        | 195/1000 [50:26<3:49:02, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  20%|█▉        | 196/1000 [50:43<3:49:04, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  20%|█▉        | 197/1000 [51:00<3:49:01, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  20%|█▉        | 198/1000 [51:17<3:48:58, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  20%|█▉        | 199/1000 [51:31<3:35:17, 16.13s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.77 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]



Training episodes:  20%|██        | 200/1000 [51:45<3:25:40, 15.43s/it]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
 OX
XOX
O  
⏱️ Duration: 13.78 sec
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen 

Training episodes:  20%|██        | 201/1000 [52:02<3:32:20, 15.95s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [0]



Training episodes:  20%|██        | 202/1000 [52:19<3:36:43, 16.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [

Training episodes:  20%|██        | 203/1000 [52:36<3:39:58, 16.56s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOX
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [

Training episodes:  20%|██        | 204/1000 [52:54<3:41:46, 16.72s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 3 4 5 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] Valid moves: [0 1 3 4 7 8]
[DQAgent_O] Chosen action: 4
[Mistral_X] valid moves: [0 1 3 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  20%|██        | 205/1000 [53:11<3:42:56, 16.83s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOX
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 6
Turn 3: player_1 → 5
Turn 4: player_2 → 4
Turn 5: player_1 → 3
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  21%|██        | 206/1000 [53:28<3:43:22, 16.88s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  21%|██        | 207/1000 [53:45<3:43:32, 16.91s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  21%|██        | 208/1000 [54:02<3:43:59, 16.97s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  21%|██        | 209/1000 [54:19<3:44:01, 16.99s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  21%|██        | 210/1000 [54:36<3:43:59, 17.01s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  21%|██        | 211/1000 [54:53<3:44:19, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  21%|██        | 212/1000 [55:10<3:44:44, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  21%|██▏       | 213/1000 [55:27<3:43:49, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.95 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  21%|██▏       | 214/1000 [55:45<3:44:35, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.32 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  22%|██▏       | 215/1000 [56:02<3:44:23, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  22%|██▏       | 216/1000 [56:19<3:43:55, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  22%|██▏       | 217/1000 [56:36<3:43:35, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  22%|██▏       | 218/1000 [56:53<3:43:56, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.29 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  22%|██▏       | 219/1000 [57:10<3:43:47, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  22%|██▏       | 220/1000 [57:28<3:43:05, 17.16s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  22%|██▏       | 221/1000 [57:45<3:42:43, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  22%|██▏       | 222/1000 [58:02<3:42:11, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  22%|██▏       | 223/1000 [58:19<3:41:37, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  22%|██▏       | 224/1000 [58:36<3:41:33, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  22%|██▎       | 225/1000 [58:53<3:41:14, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  23%|██▎       | 226/1000 [59:10<3:40:28, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  23%|██▎       | 227/1000 [59:27<3:40:08, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  23%|██▎       | 228/1000 [59:44<3:40:08, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  23%|██▎       | 229/1000 [1:00:01<3:39:14, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.94 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  23%|██▎       | 230/1000 [1:00:19<3:39:25, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  23%|██▎       | 231/1000 [1:00:36<3:39:42, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  23%|██▎       | 232/1000 [1:00:53<3:39:32, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  23%|██▎       | 233/1000 [1:01:10<3:38:34, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [8]



Training episodes:  23%|██▎       | 234/1000 [1:01:27<3:38:05, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  24%|██▎       | 235/1000 [1:01:44<3:38:20, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [8]



Training episodes:  24%|██▎       | 236/1000 [1:02:01<3:38:13, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  24%|██▎       | 237/1000 [1:02:18<3:37:52, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  24%|██▍       | 238/1000 [1:02:36<3:37:51, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  24%|██▍       | 239/1000 [1:02:53<3:37:28, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  24%|██▍       | 240/1000 [1:03:10<3:37:32, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  24%|██▍       | 241/1000 [1:03:27<3:37:00, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  24%|██▍       | 242/1000 [1:03:44<3:36:41, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  24%|██▍       | 243/1000 [1:04:01<3:36:30, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  24%|██▍       | 244/1000 [1:04:19<3:36:18, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  24%|██▍       | 245/1000 [1:04:36<3:35:35, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  25%|██▍       | 246/1000 [1:04:53<3:35:11, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  25%|██▍       | 247/1000 [1:05:10<3:34:30, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [8]



Training episodes:  25%|██▍       | 248/1000 [1:05:27<3:34:31, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  25%|██▍       | 249/1000 [1:05:44<3:34:27, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  25%|██▌       | 250/1000 [1:06:01<3:34:29, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  25%|██▌       | 251/1000 [1:06:18<3:33:41, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  25%|██▌       | 252/1000 [1:06:36<3:33:40, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  25%|██▌       | 253/1000 [1:06:53<3:33:36, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  25%|██▌       | 254/1000 [1:07:10<3:33:11, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  26%|██▌       | 255/1000 [1:07:27<3:32:51, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  26%|██▌       | 256/1000 [1:07:44<3:32:30, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  26%|██▌       | 257/1000 [1:08:01<3:32:10, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  26%|██▌       | 258/1000 [1:08:19<3:32:09, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  26%|██▌       | 259/1000 [1:08:36<3:31:46, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  26%|██▌       | 260/1000 [1:08:53<3:31:38, 17.16s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  26%|██▌       | 261/1000 [1:09:10<3:31:18, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  26%|██▌       | 262/1000 [1:09:27<3:30:59, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  26%|██▋       | 263/1000 [1:09:44<3:30:20, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 7
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  26%|██▋       | 264/1000 [1:10:01<3:30:06, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  26%|██▋       | 265/1000 [1:10:19<3:30:01, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  27%|██▋       | 266/1000 [1:10:36<3:29:53, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  27%|██▋       | 267/1000 [1:10:53<3:29:11, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  27%|██▋       | 268/1000 [1:11:10<3:29:04, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  27%|██▋       | 269/1000 [1:11:27<3:28:51, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  27%|██▋       | 270/1000 [1:11:44<3:28:20, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  27%|██▋       | 271/1000 [1:12:01<3:27:36, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  27%|██▋       | 272/1000 [1:12:18<3:26:43, 17.04s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.91 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  27%|██▋       | 273/1000 [1:12:35<3:26:21, 17.03s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  27%|██▋       | 274/1000 [1:12:52<3:26:34, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  28%|██▊       | 275/1000 [1:13:10<3:26:59, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.26 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  28%|██▊       | 276/1000 [1:13:26<3:26:00, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.92 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  28%|██▊       | 277/1000 [1:13:44<3:26:00, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  28%|██▊       | 278/1000 [1:14:01<3:26:09, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  28%|██▊       | 279/1000 [1:14:18<3:25:34, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  28%|██▊       | 280/1000 [1:14:35<3:25:35, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  28%|██▊       | 281/1000 [1:14:52<3:25:57, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  28%|██▊       | 282/1000 [1:15:10<3:26:26, 17.25s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  28%|██▊       | 283/1000 [1:15:27<3:26:47, 17.30s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.42 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  28%|██▊       | 284/1000 [1:15:45<3:26:51, 17.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  28%|██▊       | 285/1000 [1:16:02<3:26:27, 17.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.29 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  29%|██▊       | 286/1000 [1:16:19<3:26:19, 17.34s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  29%|██▊       | 287/1000 [1:16:37<3:26:47, 17.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.54 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  29%|██▉       | 288/1000 [1:16:54<3:26:28, 17.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  29%|██▉       | 289/1000 [1:17:12<3:26:23, 17.42s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.45 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  29%|██▉       | 290/1000 [1:17:29<3:25:47, 17.39s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.32 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  29%|██▉       | 291/1000 [1:17:46<3:23:59, 17.26s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  29%|██▉       | 292/1000 [1:18:03<3:23:24, 17.24s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  29%|██▉       | 293/1000 [1:18:20<3:22:57, 17.22s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  29%|██▉       | 294/1000 [1:18:38<3:22:36, 17.22s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  30%|██▉       | 295/1000 [1:18:55<3:21:39, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  30%|██▉       | 296/1000 [1:19:12<3:21:19, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  30%|██▉       | 297/1000 [1:19:29<3:21:07, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  30%|██▉       | 298/1000 [1:19:46<3:20:35, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  30%|██▉       | 299/1000 [1:20:03<3:20:16, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  30%|███       | 300/1000 [1:20:20<3:20:01, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 8]

[Mistral_X] LLM chose invalid move: 5, not in [1 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  30%|███       | 301/1000 [1:20:37<3:19:47, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
XXO
OX 
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 1
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  30%|███       | 302/1000 [1:20:55<3:19:13, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 8]

[Mistral_X] LLM chose invalid move: 5, not in [1 4 8

Training episodes:  30%|███       | 303/1000 [1:21:12<3:18:45, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 0
Turn 7: player_1 → 8
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 8]

[Mistral_X] LLM chose invalid move: 5, not in [1 4 8

Training episodes:  30%|███       | 304/1000 [1:21:29<3:18:45, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XOO
OX 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 4
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  30%|███       | 305/1000 [1:21:46<3:18:26, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  31%|███       | 306/1000 [1:22:03<3:17:58, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  31%|███       | 307/1000 [1:22:20<3:18:04, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 8]

[Mistral_X] LLM chose invalid move: 5, not in [1 4 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  31%|███       | 308/1000 [1:22:37<3:17:41, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XOO
OX 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 4
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  31%|███       | 309/1000 [1:22:54<3:17:23, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  31%|███       | 310/1000 [1:23:12<3:17:20, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  31%|███       | 311/1000 [1:23:29<3:16:50, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  31%|███       | 312/1000 [1:23:46<3:16:21, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  31%|███▏      | 313/1000 [1:24:03<3:15:41, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  31%|███▏      | 314/1000 [1:24:20<3:15:34, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  32%|███▏      | 315/1000 [1:24:37<3:15:13, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  32%|███▏      | 316/1000 [1:24:54<3:15:01, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  32%|███▏      | 317/1000 [1:25:11<3:14:15, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  32%|███▏      | 318/1000 [1:25:28<3:14:24, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  32%|███▏      | 319/1000 [1:25:45<3:13:58, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 8]

[Mistral_X] LLM chose invalid move: 5, not in [1 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  32%|███▏      | 320/1000 [1:26:03<3:13:54, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
XXO
OX 
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 1
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  32%|███▏      | 321/1000 [1:26:20<3:13:20, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  32%|███▏      | 322/1000 [1:26:37<3:13:25, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  32%|███▏      | 323/1000 [1:26:54<3:13:24, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  32%|███▏      | 324/1000 [1:27:11<3:13:20, 17.16s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  32%|███▎      | 325/1000 [1:27:28<3:12:48, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  33%|███▎      | 326/1000 [1:27:45<3:12:22, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  33%|███▎      | 327/1000 [1:28:03<3:12:08, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  33%|███▎      | 328/1000 [1:28:20<3:11:29, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  33%|███▎      | 329/1000 [1:28:37<3:11:31, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  33%|███▎      | 330/1000 [1:28:54<3:10:49, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  33%|███▎      | 331/1000 [1:29:11<3:10:52, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  33%|███▎      | 332/1000 [1:29:28<3:10:15, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  33%|███▎      | 333/1000 [1:29:45<3:10:05, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  33%|███▎      | 334/1000 [1:30:02<3:09:37, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  34%|███▎      | 335/1000 [1:30:19<3:09:30, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  34%|███▎      | 336/1000 [1:30:37<3:09:38, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  34%|███▎      | 337/1000 [1:30:54<3:09:01, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  34%|███▍      | 338/1000 [1:31:11<3:08:46, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  34%|███▍      | 339/1000 [1:31:28<3:08:56, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  34%|███▍      | 340/1000 [1:31:45<3:08:07, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  34%|███▍      | 341/1000 [1:32:02<3:08:03, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  34%|███▍      | 342/1000 [1:32:19<3:07:32, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  34%|███▍      | 343/1000 [1:32:36<3:07:18, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  34%|███▍      | 344/1000 [1:32:53<3:07:11, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  34%|███▍      | 345/1000 [1:33:11<3:06:54, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  35%|███▍      | 346/1000 [1:33:27<3:06:07, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  35%|███▍      | 347/1000 [1:33:44<3:05:34, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  35%|███▍      | 348/1000 [1:34:01<3:05:02, 17.03s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  35%|███▍      | 349/1000 [1:34:18<3:04:40, 17.02s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  35%|███▌      | 350/1000 [1:34:35<3:04:26, 17.02s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  35%|███▌      | 351/1000 [1:34:53<3:04:35, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  35%|███▌      | 352/1000 [1:35:10<3:04:33, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  35%|███▌      | 353/1000 [1:35:27<3:04:20, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  35%|███▌      | 354/1000 [1:35:44<3:04:05, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  36%|███▌      | 355/1000 [1:36:01<3:03:36, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  36%|███▌      | 356/1000 [1:36:18<3:03:37, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  36%|███▌      | 357/1000 [1:36:35<3:02:57, 17.07s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  36%|███▌      | 358/1000 [1:36:52<3:02:41, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  36%|███▌      | 359/1000 [1:37:09<3:02:31, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  36%|███▌      | 360/1000 [1:37:26<3:01:55, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  36%|███▌      | 361/1000 [1:37:43<3:01:36, 17.05s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  36%|███▌      | 362/1000 [1:38:01<3:01:40, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  36%|███▋      | 363/1000 [1:38:18<3:01:33, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  36%|███▋      | 364/1000 [1:38:35<3:01:31, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  36%|███▋      | 365/1000 [1:38:52<3:01:07, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  37%|███▋      | 366/1000 [1:39:09<3:01:10, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  37%|███▋      | 367/1000 [1:39:26<3:00:57, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  37%|███▋      | 368/1000 [1:39:44<3:00:43, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  37%|███▋      | 369/1000 [1:40:01<3:00:20, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  37%|███▋      | 370/1000 [1:40:18<2:59:52, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  37%|███▋      | 371/1000 [1:40:35<2:59:05, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  37%|███▋      | 372/1000 [1:40:52<2:58:43, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  37%|███▋      | 373/1000 [1:41:09<2:58:28, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  37%|███▋      | 374/1000 [1:41:26<2:58:57, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  38%|███▊      | 375/1000 [1:41:44<2:59:17, 17.21s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  38%|███▊      | 376/1000 [1:42:01<2:59:07, 17.22s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  38%|███▊      | 377/1000 [1:42:18<2:58:33, 17.20s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  38%|███▊      | 378/1000 [1:42:35<2:58:11, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  38%|███▊      | 379/1000 [1:42:52<2:57:49, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  38%|███▊      | 380/1000 [1:43:09<2:57:05, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  38%|███▊      | 381/1000 [1:43:26<2:56:50, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  38%|███▊      | 382/1000 [1:43:44<2:56:13, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  38%|███▊      | 383/1000 [1:44:01<2:55:50, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  38%|███▊      | 384/1000 [1:44:18<2:55:30, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  38%|███▊      | 385/1000 [1:44:35<2:54:53, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  39%|███▊      | 386/1000 [1:44:52<2:54:38, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  39%|███▊      | 387/1000 [1:45:09<2:54:33, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  39%|███▉      | 388/1000 [1:45:26<2:54:17, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  39%|███▉      | 389/1000 [1:45:43<2:53:58, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  39%|███▉      | 390/1000 [1:46:00<2:53:33, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  39%|███▉      | 391/1000 [1:46:17<2:53:32, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  39%|███▉      | 392/1000 [1:46:34<2:53:05, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  39%|███▉      | 393/1000 [1:46:51<2:53:06, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  39%|███▉      | 394/1000 [1:47:09<2:53:04, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  40%|███▉      | 395/1000 [1:47:26<2:52:50, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  40%|███▉      | 396/1000 [1:47:43<2:52:48, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  40%|███▉      | 397/1000 [1:48:00<2:52:32, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  40%|███▉      | 398/1000 [1:48:17<2:52:11, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  40%|███▉      | 399/1000 [1:48:34<2:51:31, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  40%|████      | 400/1000 [1:48:51<2:50:46, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  40%|████      | 401/1000 [1:49:08<2:50:39, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  40%|████      | 402/1000 [1:49:26<2:50:23, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  40%|████      | 403/1000 [1:49:43<2:50:14, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  40%|████      | 404/1000 [1:50:00<2:49:47, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  40%|████      | 405/1000 [1:50:17<2:49:19, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  41%|████      | 406/1000 [1:50:34<2:49:11, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  41%|████      | 407/1000 [1:50:51<2:48:48, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  41%|████      | 408/1000 [1:51:08<2:48:48, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  41%|████      | 409/1000 [1:51:25<2:48:42, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  41%|████      | 410/1000 [1:51:42<2:48:06, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  41%|████      | 411/1000 [1:52:00<2:48:05, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  41%|████      | 412/1000 [1:52:17<2:47:59, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  41%|████▏     | 413/1000 [1:52:34<2:47:40, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  41%|████▏     | 414/1000 [1:52:51<2:47:10, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  42%|████▏     | 415/1000 [1:53:08<2:46:51, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  42%|████▏     | 416/1000 [1:53:25<2:46:29, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  42%|████▏     | 417/1000 [1:53:42<2:46:16, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  42%|████▏     | 418/1000 [1:53:59<2:45:52, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  42%|████▏     | 419/1000 [1:54:16<2:45:43, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  42%|████▏     | 420/1000 [1:54:34<2:45:30, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  42%|████▏     | 421/1000 [1:54:51<2:45:10, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  42%|████▏     | 422/1000 [1:55:08<2:44:29, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  42%|████▏     | 423/1000 [1:55:25<2:43:59, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  42%|████▏     | 424/1000 [1:55:42<2:44:03, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  42%|████▎     | 425/1000 [1:55:59<2:43:52, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  43%|████▎     | 426/1000 [1:56:16<2:43:48, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  43%|████▎     | 427/1000 [1:56:33<2:43:18, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  43%|████▎     | 428/1000 [1:56:50<2:43:21, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  43%|████▎     | 429/1000 [1:57:07<2:42:36, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  43%|████▎     | 430/1000 [1:57:24<2:42:00, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  43%|████▎     | 431/1000 [1:57:41<2:41:30, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  43%|████▎     | 432/1000 [1:57:59<2:41:32, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  43%|████▎     | 433/1000 [1:58:16<2:41:33, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  43%|████▎     | 434/1000 [1:58:33<2:41:26, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  44%|████▎     | 435/1000 [1:58:50<2:41:16, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  44%|████▎     | 436/1000 [1:59:07<2:41:06, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  44%|████▎     | 437/1000 [1:59:24<2:40:28, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  44%|████▍     | 438/1000 [1:59:41<2:40:05, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  44%|████▍     | 439/1000 [1:59:58<2:40:01, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  44%|████▍     | 440/1000 [2:00:16<2:39:57, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  44%|████▍     | 441/1000 [2:00:33<2:40:20, 17.21s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.37 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  44%|████▍     | 442/1000 [2:00:50<2:40:39, 17.28s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.42 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  44%|████▍     | 443/1000 [2:01:08<2:40:56, 17.34s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.47 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  44%|████▍     | 444/1000 [2:01:26<2:41:33, 17.43s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.65 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  44%|████▍     | 445/1000 [2:01:43<2:41:08, 17.42s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  45%|████▍     | 446/1000 [2:02:00<2:40:46, 17.41s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  45%|████▍     | 447/1000 [2:02:18<2:40:33, 17.42s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.43 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  45%|████▍     | 448/1000 [2:02:35<2:40:08, 17.41s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.37 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  45%|████▍     | 449/1000 [2:02:52<2:39:09, 17.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  45%|████▌     | 450/1000 [2:03:10<2:38:36, 17.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  45%|████▌     | 451/1000 [2:03:27<2:37:53, 17.26s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  45%|████▌     | 452/1000 [2:03:44<2:37:22, 17.23s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  45%|████▌     | 453/1000 [2:04:01<2:36:39, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  45%|████▌     | 454/1000 [2:04:18<2:36:16, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  46%|████▌     | 455/1000 [2:04:35<2:35:57, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  46%|████▌     | 456/1000 [2:04:52<2:35:17, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  46%|████▌     | 457/1000 [2:05:09<2:35:13, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  46%|████▌     | 458/1000 [2:05:27<2:34:55, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  46%|████▌     | 459/1000 [2:05:44<2:34:13, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  46%|████▌     | 460/1000 [2:06:01<2:33:46, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  46%|████▌     | 461/1000 [2:06:18<2:33:48, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  46%|████▌     | 462/1000 [2:06:35<2:34:04, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  46%|████▋     | 463/1000 [2:06:53<2:34:37, 17.28s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.48 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  46%|████▋     | 464/1000 [2:07:10<2:34:53, 17.34s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.47 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  46%|████▋     | 465/1000 [2:07:28<2:34:59, 17.38s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.47 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  47%|████▋     | 466/1000 [2:07:45<2:34:58, 17.41s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.48 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  47%|████▋     | 467/1000 [2:08:03<2:34:36, 17.40s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.37 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  47%|████▋     | 468/1000 [2:08:20<2:34:07, 17.38s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.32 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  47%|████▋     | 469/1000 [2:08:37<2:33:47, 17.38s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  47%|████▋     | 470/1000 [2:08:55<2:33:51, 17.42s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.50 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  47%|████▋     | 471/1000 [2:09:12<2:32:57, 17.35s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  47%|████▋     | 472/1000 [2:09:29<2:31:59, 17.27s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  47%|████▋     | 473/1000 [2:09:46<2:32:14, 17.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.47 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  47%|████▋     | 474/1000 [2:10:04<2:31:59, 17.34s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  48%|████▊     | 475/1000 [2:10:21<2:31:49, 17.35s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.37 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  48%|████▊     | 476/1000 [2:10:39<2:31:37, 17.36s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  48%|████▊     | 477/1000 [2:10:56<2:31:27, 17.38s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  48%|████▊     | 478/1000 [2:11:13<2:31:13, 17.38s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  48%|████▊     | 479/1000 [2:11:31<2:30:58, 17.39s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  48%|████▊     | 480/1000 [2:11:48<2:31:00, 17.42s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.50 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  48%|████▊     | 481/1000 [2:12:06<2:30:48, 17.43s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.45 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  48%|████▊     | 482/1000 [2:12:23<2:30:24, 17.42s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  48%|████▊     | 483/1000 [2:12:41<2:30:14, 17.44s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.46 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  48%|████▊     | 484/1000 [2:12:58<2:29:06, 17.34s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  48%|████▊     | 485/1000 [2:13:15<2:28:28, 17.30s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  49%|████▊     | 486/1000 [2:13:32<2:28:22, 17.32s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  49%|████▊     | 487/1000 [2:13:50<2:28:43, 17.39s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.56 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  49%|████▉     | 488/1000 [2:14:07<2:28:28, 17.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.40 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  49%|████▉     | 489/1000 [2:14:25<2:28:13, 17.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.41 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  49%|████▉     | 490/1000 [2:14:42<2:27:58, 17.41s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.41 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  49%|████▉     | 491/1000 [2:15:00<2:27:35, 17.40s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  49%|████▉     | 492/1000 [2:15:17<2:27:08, 17.38s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.33 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  49%|████▉     | 493/1000 [2:15:34<2:26:06, 17.29s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  49%|████▉     | 494/1000 [2:15:51<2:25:23, 17.24s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  50%|████▉     | 495/1000 [2:16:08<2:24:34, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  50%|████▉     | 496/1000 [2:16:25<2:24:13, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  50%|████▉     | 497/1000 [2:16:42<2:23:28, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  50%|████▉     | 498/1000 [2:16:59<2:22:55, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  50%|████▉     | 499/1000 [2:17:16<2:22:27, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  50%|█████     | 500/1000 [2:17:33<2:22:03, 17.05s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  50%|█████     | 501/1000 [2:17:50<2:21:52, 17.06s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  50%|█████     | 502/1000 [2:18:07<2:21:40, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  50%|█████     | 503/1000 [2:18:25<2:21:44, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  50%|█████     | 504/1000 [2:18:42<2:21:31, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  50%|█████     | 505/1000 [2:18:59<2:21:05, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  51%|█████     | 506/1000 [2:19:16<2:20:59, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  51%|█████     | 507/1000 [2:19:33<2:20:52, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  51%|█████     | 508/1000 [2:19:50<2:20:31, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  51%|█████     | 509/1000 [2:20:08<2:20:25, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  51%|█████     | 510/1000 [2:20:25<2:20:13, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  51%|█████     | 511/1000 [2:20:42<2:20:00, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  51%|█████     | 512/1000 [2:20:59<2:19:42, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  51%|█████▏    | 513/1000 [2:21:16<2:19:08, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  51%|█████▏    | 514/1000 [2:21:33<2:18:50, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  52%|█████▏    | 515/1000 [2:21:50<2:18:09, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  52%|█████▏    | 516/1000 [2:22:07<2:17:57, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  52%|█████▏    | 517/1000 [2:22:25<2:18:01, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  52%|█████▏    | 518/1000 [2:22:42<2:17:43, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  52%|█████▏    | 519/1000 [2:22:59<2:17:47, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.28 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  52%|█████▏    | 520/1000 [2:23:16<2:17:27, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  52%|█████▏    | 521/1000 [2:23:33<2:16:53, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  52%|█████▏    | 522/1000 [2:23:51<2:16:54, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.27 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  52%|█████▏    | 523/1000 [2:24:08<2:16:27, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  52%|█████▏    | 524/1000 [2:24:25<2:16:03, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  52%|█████▎    | 525/1000 [2:24:42<2:15:54, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  53%|█████▎    | 526/1000 [2:24:59<2:15:23, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  53%|█████▎    | 527/1000 [2:25:16<2:15:05, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  53%|█████▎    | 528/1000 [2:25:33<2:14:49, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  53%|█████▎    | 529/1000 [2:25:50<2:14:22, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  53%|█████▎    | 530/1000 [2:26:08<2:13:56, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  53%|█████▎    | 531/1000 [2:26:25<2:13:51, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  53%|█████▎    | 532/1000 [2:26:42<2:13:49, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  53%|█████▎    | 533/1000 [2:26:59<2:13:42, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  53%|█████▎    | 534/1000 [2:27:16<2:13:28, 17.19s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  54%|█████▎    | 535/1000 [2:27:34<2:13:05, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  54%|█████▎    | 536/1000 [2:27:51<2:12:47, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  54%|█████▎    | 537/1000 [2:28:08<2:12:22, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  54%|█████▍    | 538/1000 [2:28:25<2:12:19, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  54%|█████▍    | 539/1000 [2:28:42<2:11:44, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  54%|█████▍    | 540/1000 [2:28:59<2:11:18, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  54%|█████▍    | 541/1000 [2:29:16<2:11:05, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  54%|█████▍    | 542/1000 [2:29:33<2:10:41, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  54%|█████▍    | 543/1000 [2:29:50<2:10:15, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  54%|█████▍    | 544/1000 [2:30:08<2:09:54, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  55%|█████▍    | 545/1000 [2:30:25<2:09:25, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  55%|█████▍    | 546/1000 [2:30:42<2:09:26, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  55%|█████▍    | 547/1000 [2:30:59<2:09:20, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  55%|█████▍    | 548/1000 [2:31:16<2:09:09, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  55%|█████▍    | 549/1000 [2:31:33<2:08:42, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  55%|█████▌    | 550/1000 [2:31:50<2:08:30, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  55%|█████▌    | 551/1000 [2:32:07<2:08:09, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  55%|█████▌    | 552/1000 [2:32:25<2:07:59, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  55%|█████▌    | 553/1000 [2:32:42<2:07:40, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  55%|█████▌    | 554/1000 [2:32:59<2:07:26, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  56%|█████▌    | 555/1000 [2:33:16<2:06:48, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  56%|█████▌    | 556/1000 [2:33:33<2:06:24, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  56%|█████▌    | 557/1000 [2:33:50<2:06:09, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  56%|█████▌    | 558/1000 [2:34:07<2:05:52, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  56%|█████▌    | 559/1000 [2:34:24<2:05:46, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  56%|█████▌    | 560/1000 [2:34:41<2:05:35, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  56%|█████▌    | 561/1000 [2:34:59<2:05:21, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  56%|█████▌    | 562/1000 [2:35:16<2:05:14, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  56%|█████▋    | 563/1000 [2:35:33<2:05:03, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  56%|█████▋    | 564/1000 [2:35:50<2:04:31, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  56%|█████▋    | 565/1000 [2:36:07<2:04:06, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  57%|█████▋    | 566/1000 [2:36:24<2:03:47, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  57%|█████▋    | 567/1000 [2:36:41<2:03:22, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  57%|█████▋    | 568/1000 [2:36:58<2:03:13, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  57%|█████▋    | 569/1000 [2:37:15<2:02:35, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.95 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  57%|█████▋    | 570/1000 [2:37:32<2:02:15, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  57%|█████▋    | 571/1000 [2:37:49<2:01:47, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  57%|█████▋    | 572/1000 [2:38:07<2:01:41, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  57%|█████▋    | 573/1000 [2:38:24<2:01:31, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  57%|█████▋    | 574/1000 [2:38:41<2:01:24, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  57%|█████▊    | 575/1000 [2:38:58<2:01:15, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  58%|█████▊    | 576/1000 [2:39:15<2:00:49, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  58%|█████▊    | 577/1000 [2:39:32<2:00:32, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  58%|█████▊    | 578/1000 [2:39:49<2:00:26, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  58%|█████▊    | 579/1000 [2:40:07<2:00:15, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  58%|█████▊    | 580/1000 [2:40:24<2:00:07, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  58%|█████▊    | 581/1000 [2:40:41<1:59:35, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  58%|█████▊    | 582/1000 [2:40:58<1:59:05, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  58%|█████▊    | 583/1000 [2:41:15<1:58:51, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  58%|█████▊    | 584/1000 [2:41:32<1:58:28, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  58%|█████▊    | 585/1000 [2:41:49<1:58:05, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  59%|█████▊    | 586/1000 [2:42:06<1:57:52, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  59%|█████▊    | 587/1000 [2:42:23<1:57:31, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  59%|█████▉    | 588/1000 [2:42:40<1:57:19, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  59%|█████▉    | 589/1000 [2:42:57<1:57:06, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  59%|█████▉    | 590/1000 [2:43:15<1:57:07, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  59%|█████▉    | 591/1000 [2:43:32<1:57:03, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  59%|█████▉    | 592/1000 [2:43:49<1:56:32, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  59%|█████▉    | 593/1000 [2:44:06<1:56:10, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  59%|█████▉    | 594/1000 [2:44:23<1:55:59, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  60%|█████▉    | 595/1000 [2:44:40<1:55:46, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  60%|█████▉    | 596/1000 [2:44:57<1:55:14, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  60%|█████▉    | 597/1000 [2:45:14<1:54:47, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  60%|█████▉    | 598/1000 [2:45:31<1:54:22, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  60%|█████▉    | 599/1000 [2:45:48<1:53:50, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 16.95 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  60%|██████    | 600/1000 [2:46:06<1:53:40, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  60%|██████    | 601/1000 [2:46:23<1:53:58, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.33 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  60%|██████    | 602/1000 [2:46:40<1:53:51, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  60%|██████    | 603/1000 [2:46:57<1:53:39, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  60%|██████    | 604/1000 [2:47:14<1:53:21, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  60%|██████    | 605/1000 [2:47:32<1:53:02, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  61%|██████    | 606/1000 [2:47:49<1:52:36, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  61%|██████    | 607/1000 [2:48:06<1:52:19, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  61%|██████    | 608/1000 [2:48:23<1:51:57, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  61%|██████    | 609/1000 [2:48:40<1:51:45, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  61%|██████    | 610/1000 [2:48:57<1:51:16, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  61%|██████    | 611/1000 [2:49:14<1:50:46, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  61%|██████    | 612/1000 [2:49:31<1:50:40, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  61%|██████▏   | 613/1000 [2:49:49<1:50:25, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  61%|██████▏   | 614/1000 [2:50:06<1:50:00, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  62%|██████▏   | 615/1000 [2:50:23<1:49:32, 17.07s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  62%|██████▏   | 616/1000 [2:50:40<1:49:15, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  62%|██████▏   | 617/1000 [2:50:57<1:49:03, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  62%|██████▏   | 618/1000 [2:51:14<1:48:54, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  62%|██████▏   | 619/1000 [2:51:31<1:48:56, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.26 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  62%|██████▏   | 620/1000 [2:51:48<1:48:34, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  62%|██████▏   | 621/1000 [2:52:05<1:48:12, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  62%|██████▏   | 622/1000 [2:52:23<1:47:59, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  62%|██████▏   | 623/1000 [2:52:40<1:47:47, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  62%|██████▏   | 624/1000 [2:52:57<1:47:34, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  62%|██████▎   | 625/1000 [2:53:14<1:47:14, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  63%|██████▎   | 626/1000 [2:53:31<1:46:44, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  63%|██████▎   | 627/1000 [2:53:48<1:46:47, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.30 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  63%|██████▎   | 628/1000 [2:54:06<1:46:49, 17.23s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  63%|██████▎   | 629/1000 [2:54:23<1:46:51, 17.28s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  63%|██████▎   | 630/1000 [2:54:41<1:46:48, 17.32s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.40 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  63%|██████▎   | 631/1000 [2:54:58<1:46:42, 17.35s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.41 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  63%|██████▎   | 632/1000 [2:55:15<1:46:24, 17.35s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  63%|██████▎   | 633/1000 [2:55:33<1:46:10, 17.36s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.37 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  63%|██████▎   | 634/1000 [2:55:50<1:45:57, 17.37s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.38 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  64%|██████▎   | 635/1000 [2:56:08<1:45:49, 17.40s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.44 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  64%|██████▎   | 636/1000 [2:56:25<1:45:15, 17.35s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  64%|██████▎   | 637/1000 [2:56:42<1:44:38, 17.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  64%|██████▍   | 638/1000 [2:56:59<1:43:54, 17.22s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  64%|██████▍   | 639/1000 [2:57:16<1:43:28, 17.20s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  64%|██████▍   | 640/1000 [2:57:33<1:42:57, 17.16s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  64%|██████▍   | 641/1000 [2:57:50<1:42:36, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  64%|██████▍   | 642/1000 [2:58:07<1:42:06, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  64%|██████▍   | 643/1000 [2:58:25<1:41:56, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  64%|██████▍   | 644/1000 [2:58:42<1:41:43, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  64%|██████▍   | 645/1000 [2:58:59<1:41:32, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  65%|██████▍   | 646/1000 [2:59:16<1:41:15, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  65%|██████▍   | 647/1000 [2:59:33<1:40:49, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  65%|██████▍   | 648/1000 [2:59:50<1:40:31, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  65%|██████▍   | 649/1000 [3:00:07<1:40:01, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  65%|██████▌   | 650/1000 [3:00:24<1:39:37, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  65%|██████▌   | 651/1000 [3:00:41<1:39:15, 17.06s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  65%|██████▌   | 652/1000 [3:00:59<1:39:11, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  65%|██████▌   | 653/1000 [3:01:16<1:38:59, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  65%|██████▌   | 654/1000 [3:01:33<1:38:45, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  66%|██████▌   | 655/1000 [3:01:50<1:38:40, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  66%|██████▌   | 656/1000 [3:02:07<1:38:06, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  66%|██████▌   | 657/1000 [3:02:24<1:37:48, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  66%|██████▌   | 658/1000 [3:02:41<1:37:25, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  66%|██████▌   | 659/1000 [3:02:58<1:37:09, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  66%|██████▌   | 660/1000 [3:03:16<1:36:58, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  66%|██████▌   | 661/1000 [3:03:33<1:36:31, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  66%|██████▌   | 662/1000 [3:03:50<1:36:22, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  66%|██████▋   | 663/1000 [3:04:07<1:36:09, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  66%|██████▋   | 664/1000 [3:04:24<1:36:03, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  66%|██████▋   | 665/1000 [3:04:41<1:35:43, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  67%|██████▋   | 666/1000 [3:04:58<1:35:14, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  67%|██████▋   | 667/1000 [3:05:15<1:35:01, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  67%|██████▋   | 668/1000 [3:05:32<1:34:35, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  67%|██████▋   | 669/1000 [3:05:49<1:34:08, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  67%|██████▋   | 670/1000 [3:06:07<1:34:05, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  67%|██████▋   | 671/1000 [3:06:24<1:33:58, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  67%|██████▋   | 672/1000 [3:06:41<1:33:45, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  67%|██████▋   | 673/1000 [3:06:58<1:33:31, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  67%|██████▋   | 674/1000 [3:07:15<1:33:04, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  68%|██████▊   | 675/1000 [3:07:32<1:32:48, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  68%|██████▊   | 676/1000 [3:07:50<1:32:34, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  68%|██████▊   | 677/1000 [3:08:07<1:32:07, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  68%|██████▊   | 678/1000 [3:08:24<1:31:53, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  68%|██████▊   | 679/1000 [3:08:41<1:31:41, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  68%|██████▊   | 680/1000 [3:08:58<1:31:26, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  68%|██████▊   | 681/1000 [3:09:15<1:31:01, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  68%|██████▊   | 682/1000 [3:09:32<1:30:33, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  68%|██████▊   | 683/1000 [3:09:49<1:30:10, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  68%|██████▊   | 684/1000 [3:10:06<1:29:43, 17.04s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  68%|██████▊   | 685/1000 [3:10:23<1:29:26, 17.04s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  69%|██████▊   | 686/1000 [3:10:40<1:29:20, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  69%|██████▊   | 687/1000 [3:10:58<1:29:09, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  69%|██████▉   | 688/1000 [3:11:15<1:29:02, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  69%|██████▉   | 689/1000 [3:11:32<1:28:52, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  69%|██████▉   | 690/1000 [3:11:49<1:28:37, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  69%|██████▉   | 691/1000 [3:12:06<1:28:25, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  69%|██████▉   | 692/1000 [3:12:23<1:27:57, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  69%|██████▉   | 693/1000 [3:12:40<1:27:29, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  69%|██████▉   | 694/1000 [3:12:57<1:27:07, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  70%|██████▉   | 695/1000 [3:13:15<1:26:58, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  70%|██████▉   | 696/1000 [3:13:32<1:26:27, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.94 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  70%|██████▉   | 697/1000 [3:13:49<1:26:20, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  70%|██████▉   | 698/1000 [3:14:06<1:26:03, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  70%|██████▉   | 699/1000 [3:14:23<1:25:49, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  70%|███████   | 700/1000 [3:14:40<1:25:40, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  70%|███████   | 701/1000 [3:14:57<1:25:16, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  70%|███████   | 702/1000 [3:15:14<1:25:00, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  70%|███████   | 703/1000 [3:15:32<1:24:48, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  70%|███████   | 704/1000 [3:15:49<1:24:32, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  70%|███████   | 705/1000 [3:16:06<1:24:22, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  71%|███████   | 706/1000 [3:16:23<1:24:10, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  71%|███████   | 707/1000 [3:16:40<1:23:39, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  71%|███████   | 708/1000 [3:16:57<1:23:18, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [0 4 7

Training episodes:  71%|███████   | 709/1000 [3:17:14<1:23:05, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XOO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 4
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  71%|███████   | 710/1000 [3:17:31<1:22:41, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  71%|███████   | 711/1000 [3:17:49<1:22:22, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  71%|███████   | 712/1000 [3:18:05<1:21:52, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 16.94 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  71%|███████▏  | 713/1000 [3:18:23<1:21:50, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  71%|███████▏  | 714/1000 [3:18:40<1:21:37, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  72%|███████▏  | 715/1000 [3:18:57<1:21:25, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  72%|███████▏  | 716/1000 [3:19:14<1:20:59, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  72%|███████▏  | 717/1000 [3:19:31<1:20:47, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  72%|███████▏  | 718/1000 [3:19:48<1:20:35, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  72%|███████▏  | 719/1000 [3:20:06<1:20:20, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  72%|███████▏  | 720/1000 [3:20:23<1:20:04, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  72%|███████▏  | 721/1000 [3:20:40<1:19:37, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  72%|███████▏  | 722/1000 [3:20:57<1:19:23, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  72%|███████▏  | 723/1000 [3:21:14<1:19:05, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  72%|███████▏  | 724/1000 [3:21:31<1:18:59, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.25 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  72%|███████▎  | 725/1000 [3:21:48<1:18:32, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  73%|███████▎  | 726/1000 [3:22:06<1:18:20, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  73%|███████▎  | 727/1000 [3:22:23<1:17:51, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  73%|███████▎  | 728/1000 [3:22:40<1:17:35, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  73%|███████▎  | 729/1000 [3:22:57<1:17:06, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  73%|███████▎  | 730/1000 [3:23:14<1:16:46, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  73%|███████▎  | 731/1000 [3:23:31<1:16:34, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  73%|███████▎  | 732/1000 [3:23:48<1:16:15, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  73%|███████▎  | 733/1000 [3:24:05<1:15:52, 17.05s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  73%|███████▎  | 734/1000 [3:24:22<1:15:48, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  74%|███████▎  | 735/1000 [3:24:39<1:15:29, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  74%|███████▎  | 736/1000 [3:24:56<1:15:12, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  74%|███████▎  | 737/1000 [3:25:13<1:14:50, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  74%|███████▍  | 738/1000 [3:25:30<1:14:30, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  74%|███████▍  | 739/1000 [3:25:48<1:14:23, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  74%|███████▍  | 740/1000 [3:26:05<1:14:02, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  74%|███████▍  | 741/1000 [3:26:22<1:13:57, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  74%|███████▍  | 742/1000 [3:26:39<1:13:41, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  74%|███████▍  | 743/1000 [3:26:56<1:13:23, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  74%|███████▍  | 744/1000 [3:27:13<1:13:07, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  74%|███████▍  | 745/1000 [3:27:31<1:12:55, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  75%|███████▍  | 746/1000 [3:27:48<1:12:36, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  75%|███████▍  | 747/1000 [3:28:05<1:12:23, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  75%|███████▍  | 748/1000 [3:28:22<1:12:07, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  75%|███████▍  | 749/1000 [3:28:39<1:11:54, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  75%|███████▌  | 750/1000 [3:28:56<1:11:24, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  75%|███████▌  | 751/1000 [3:29:13<1:11:08, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  75%|███████▌  | 752/1000 [3:29:31<1:10:47, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  75%|███████▌  | 753/1000 [3:29:48<1:10:28, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  75%|███████▌  | 754/1000 [3:30:05<1:10:14, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▌  | 755/1000 [3:30:22<1:10:03, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▌  | 756/1000 [3:30:39<1:09:48, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  76%|███████▌  | 757/1000 [3:30:56<1:09:32, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▌  | 758/1000 [3:31:13<1:09:08, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  76%|███████▌  | 759/1000 [3:31:31<1:08:55, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▌  | 760/1000 [3:31:48<1:08:36, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  76%|███████▌  | 761/1000 [3:32:05<1:08:21, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▌  | 762/1000 [3:32:22<1:08:08, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  76%|███████▋  | 763/1000 [3:32:39<1:07:55, 17.20s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.23 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  76%|███████▋  | 764/1000 [3:32:56<1:07:22, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  76%|███████▋  | 765/1000 [3:33:13<1:06:54, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  77%|███████▋  | 766/1000 [3:33:30<1:06:33, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  77%|███████▋  | 767/1000 [3:33:48<1:06:25, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  77%|███████▋  | 768/1000 [3:34:05<1:06:08, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  77%|███████▋  | 769/1000 [3:34:22<1:05:50, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  77%|███████▋  | 770/1000 [3:34:39<1:05:36, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  77%|███████▋  | 771/1000 [3:34:56<1:05:13, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  77%|███████▋  | 772/1000 [3:35:13<1:05:00, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  77%|███████▋  | 773/1000 [3:35:30<1:04:51, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  77%|███████▋  | 774/1000 [3:35:47<1:04:25, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  78%|███████▊  | 775/1000 [3:36:05<1:04:12, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  78%|███████▊  | 776/1000 [3:36:22<1:03:56, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  78%|███████▊  | 777/1000 [3:36:39<1:03:36, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  78%|███████▊  | 778/1000 [3:36:56<1:03:17, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  78%|███████▊  | 779/1000 [3:37:13<1:02:59, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  78%|███████▊  | 780/1000 [3:37:30<1:02:38, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  78%|███████▊  | 781/1000 [3:37:47<1:02:21, 17.08s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  78%|███████▊  | 782/1000 [3:38:04<1:02:02, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  78%|███████▊  | 783/1000 [3:38:21<1:01:52, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  78%|███████▊  | 784/1000 [3:38:38<1:01:39, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  78%|███████▊  | 785/1000 [3:38:56<1:01:27, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  79%|███████▊  | 786/1000 [3:39:13<1:01:14, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  79%|███████▊  | 787/1000 [3:39:30<1:00:53, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  79%|███████▉  | 788/1000 [3:39:47<1:00:36, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  79%|███████▉  | 789/1000 [3:40:04<1:00:17, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  79%|███████▉  | 790/1000 [3:40:21<1:00:04, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  79%|███████▉  | 791/1000 [3:40:39<59:50, 17.18s/it]  

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  79%|███████▉  | 792/1000 [3:40:56<59:28, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  79%|███████▉  | 793/1000 [3:41:13<59:02, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  79%|███████▉  | 794/1000 [3:41:30<58:38, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  80%|███████▉  | 795/1000 [3:41:47<58:17, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  80%|███████▉  | 796/1000 [3:42:04<57:54, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  80%|███████▉  | 797/1000 [3:42:21<57:37, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  80%|███████▉  | 798/1000 [3:42:38<57:27, 17.07s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  80%|███████▉  | 799/1000 [3:42:55<57:07, 17.05s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  80%|████████  | 800/1000 [3:43:12<56:58, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  80%|████████  | 801/1000 [3:43:29<56:46, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  80%|████████  | 802/1000 [3:43:47<56:33, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  80%|████████  | 803/1000 [3:44:04<56:10, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  80%|████████  | 804/1000 [3:44:21<55:56, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  80%|████████  | 805/1000 [3:44:38<55:41, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  81%|████████  | 806/1000 [3:44:55<55:31, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  81%|████████  | 807/1000 [3:45:12<55:10, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  81%|████████  | 808/1000 [3:45:29<54:48, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  81%|████████  | 809/1000 [3:45:46<54:30, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  81%|████████  | 810/1000 [3:46:04<54:10, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  81%|████████  | 811/1000 [3:46:21<53:48, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  81%|████████  | 812/1000 [3:46:38<53:36, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  81%|████████▏ | 813/1000 [3:46:55<53:20, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  81%|████████▏ | 814/1000 [3:47:12<53:02, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  82%|████████▏ | 815/1000 [3:47:29<52:44, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XOO
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 4
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  82%|████████▏ | 816/1000 [3:47:46<52:25, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  82%|████████▏ | 817/1000 [3:48:03<52:12, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  82%|████████▏ | 818/1000 [3:48:20<51:59, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  82%|████████▏ | 819/1000 [3:48:38<51:44, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  82%|████████▏ | 820/1000 [3:48:55<51:26, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  82%|████████▏ | 821/1000 [3:49:12<51:03, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  82%|████████▏ | 822/1000 [3:49:29<50:42, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  82%|████████▏ | 823/1000 [3:49:46<50:37, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  82%|████████▏ | 824/1000 [3:50:04<50:39, 17.27s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.51 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  82%|████████▎ | 825/1000 [3:50:21<50:30, 17.32s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.42 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  83%|████████▎ | 826/1000 [3:50:38<50:13, 17.32s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  83%|████████▎ | 827/1000 [3:50:56<49:48, 17.27s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  83%|████████▎ | 828/1000 [3:51:13<49:21, 17.22s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  83%|████████▎ | 829/1000 [3:51:30<48:56, 17.17s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  83%|████████▎ | 830/1000 [3:51:47<48:38, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  83%|████████▎ | 831/1000 [3:52:04<48:18, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  83%|████████▎ | 832/1000 [3:52:21<48:02, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  83%|████████▎ | 833/1000 [3:52:38<47:45, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  83%|████████▎ | 834/1000 [3:52:56<47:26, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  84%|████████▎ | 835/1000 [3:53:13<47:04, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  84%|████████▎ | 836/1000 [3:53:30<46:47, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  84%|████████▎ | 837/1000 [3:53:47<46:29, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  84%|████████▍ | 838/1000 [3:54:04<46:13, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  84%|████████▍ | 839/1000 [3:54:21<45:53, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  84%|████████▍ | 840/1000 [3:54:38<45:41, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  84%|████████▍ | 841/1000 [3:54:55<45:27, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  84%|████████▍ | 842/1000 [3:55:13<45:11, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  84%|████████▍ | 843/1000 [3:55:30<44:56, 17.18s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  84%|████████▍ | 844/1000 [3:55:47<44:37, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  84%|████████▍ | 845/1000 [3:56:04<44:13, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  85%|████████▍ | 846/1000 [3:56:21<43:52, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  85%|████████▍ | 847/1000 [3:56:38<43:41, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  85%|████████▍ | 848/1000 [3:56:55<43:23, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  85%|████████▍ | 849/1000 [3:57:12<43:01, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  85%|████████▌ | 850/1000 [3:57:29<42:40, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  85%|████████▌ | 851/1000 [3:57:47<42:29, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  85%|████████▌ | 852/1000 [3:58:04<42:06, 17.07s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  85%|████████▌ | 853/1000 [3:58:21<41:51, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  85%|████████▌ | 854/1000 [3:58:38<41:33, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  86%|████████▌ | 855/1000 [3:58:55<41:20, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  86%|████████▌ | 856/1000 [3:59:12<41:06, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  86%|████████▌ | 857/1000 [3:59:29<40:50, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  86%|████████▌ | 858/1000 [3:59:46<40:33, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  86%|████████▌ | 859/1000 [4:00:03<40:14, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  86%|████████▌ | 860/1000 [4:00:20<39:53, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  86%|████████▌ | 861/1000 [4:00:38<39:42, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.22 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  86%|████████▌ | 862/1000 [4:00:55<39:22, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  86%|████████▋ | 863/1000 [4:01:12<39:03, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  86%|████████▋ | 864/1000 [4:01:29<38:47, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  86%|████████▋ | 865/1000 [4:01:46<38:25, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  87%|████████▋ | 866/1000 [4:02:03<38:07, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  87%|████████▋ | 867/1000 [4:02:20<37:49, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  87%|████████▋ | 868/1000 [4:02:37<37:36, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  87%|████████▋ | 869/1000 [4:02:54<37:21, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  87%|████████▋ | 870/1000 [4:03:12<37:11, 17.16s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.29 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  87%|████████▋ | 871/1000 [4:03:29<36:53, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  87%|████████▋ | 872/1000 [4:03:46<36:34, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  87%|████████▋ | 873/1000 [4:04:03<36:14, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  87%|████████▋ | 874/1000 [4:04:20<35:56, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  88%|████████▊ | 875/1000 [4:04:37<35:38, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  88%|████████▊ | 876/1000 [4:04:54<35:21, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  88%|████████▊ | 877/1000 [4:05:11<35:02, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  88%|████████▊ | 878/1000 [4:05:28<34:42, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  88%|████████▊ | 879/1000 [4:05:46<34:27, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  88%|████████▊ | 880/1000 [4:06:03<34:14, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  88%|████████▊ | 881/1000 [4:06:20<33:57, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  88%|████████▊ | 882/1000 [4:06:37<33:47, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.33 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  88%|████████▊ | 883/1000 [4:06:54<33:31, 17.19s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  88%|████████▊ | 884/1000 [4:07:12<33:14, 17.20s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  88%|████████▊ | 885/1000 [4:07:29<32:56, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  89%|████████▊ | 886/1000 [4:07:46<32:39, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  89%|████████▊ | 887/1000 [4:08:03<32:14, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  89%|████████▉ | 888/1000 [4:08:20<31:56, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  89%|████████▉ | 889/1000 [4:08:37<31:42, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  89%|████████▉ | 890/1000 [4:08:54<31:23, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  89%|████████▉ | 891/1000 [4:09:12<31:13, 17.19s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  89%|████████▉ | 892/1000 [4:09:29<31:01, 17.24s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  89%|████████▉ | 893/1000 [4:09:46<30:38, 17.19s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  89%|████████▉ | 894/1000 [4:10:03<30:14, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 16.95 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  90%|████████▉ | 895/1000 [4:10:20<29:56, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  90%|████████▉ | 896/1000 [4:10:37<29:39, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  90%|████████▉ | 897/1000 [4:10:54<29:19, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  90%|████████▉ | 898/1000 [4:11:11<29:00, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  90%|████████▉ | 899/1000 [4:11:28<28:46, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  90%|█████████ | 900/1000 [4:11:46<28:32, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  90%|█████████ | 901/1000 [4:12:03<28:14, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  90%|█████████ | 902/1000 [4:12:20<27:57, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  90%|█████████ | 903/1000 [4:12:37<27:42, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  90%|█████████ | 904/1000 [4:12:54<27:26, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  90%|█████████ | 905/1000 [4:13:11<27:04, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  91%|█████████ | 906/1000 [4:13:28<26:45, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  91%|█████████ | 907/1000 [4:13:45<26:24, 17.04s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  91%|█████████ | 908/1000 [4:14:02<26:06, 17.03s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 16.98 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  91%|█████████ | 909/1000 [4:14:19<25:50, 17.04s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  91%|█████████ | 910/1000 [4:14:36<25:36, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  91%|█████████ | 911/1000 [4:14:53<25:19, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  91%|█████████ | 912/1000 [4:15:11<25:03, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  91%|█████████▏| 913/1000 [4:15:28<24:49, 17.12s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  91%|█████████▏| 914/1000 [4:15:45<24:33, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  92%|█████████▏| 915/1000 [4:16:02<24:13, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  92%|█████████▏| 916/1000 [4:16:19<23:58, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  92%|█████████▏| 917/1000 [4:16:36<23:46, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.32 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  92%|█████████▏| 918/1000 [4:16:54<23:27, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  92%|█████████▏| 919/1000 [4:17:11<23:09, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  92%|█████████▏| 920/1000 [4:17:28<22:47, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  92%|█████████▏| 921/1000 [4:17:45<22:28, 17.07s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  92%|█████████▏| 922/1000 [4:18:02<22:11, 17.06s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  92%|█████████▏| 923/1000 [4:18:19<21:56, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  92%|█████████▏| 924/1000 [4:18:36<21:39, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  92%|█████████▎| 925/1000 [4:18:53<21:22, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  93%|█████████▎| 926/1000 [4:19:10<21:07, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.19 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  93%|█████████▎| 927/1000 [4:19:27<20:50, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  93%|█████████▎| 928/1000 [4:19:45<20:31, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  93%|█████████▎| 929/1000 [4:20:02<20:14, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  93%|█████████▎| 930/1000 [4:20:19<19:57, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  93%|█████████▎| 931/1000 [4:20:36<19:42, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  93%|█████████▎| 932/1000 [4:20:53<19:23, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  93%|█████████▎| 933/1000 [4:21:10<19:07, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  93%|█████████▎| 934/1000 [4:21:27<18:49, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.06 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  94%|█████████▎| 935/1000 [4:21:44<18:34, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  94%|█████████▎| 936/1000 [4:22:02<18:17, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  94%|█████████▎| 937/1000 [4:22:19<18:00, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  94%|█████████▍| 938/1000 [4:22:36<17:42, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  94%|█████████▍| 939/1000 [4:22:53<17:25, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  94%|█████████▍| 940/1000 [4:23:10<17:10, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  94%|█████████▍| 941/1000 [4:23:27<16:52, 17.17s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  94%|█████████▍| 942/1000 [4:23:44<16:32, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 16.95 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  94%|█████████▍| 943/1000 [4:24:01<16:14, 17.10s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  94%|█████████▍| 944/1000 [4:24:19<15:58, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  94%|█████████▍| 945/1000 [4:24:36<15:43, 17.15s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  95%|█████████▍| 946/1000 [4:24:53<15:26, 17.15s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  95%|█████████▍| 947/1000 [4:25:10<15:09, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  95%|█████████▍| 948/1000 [4:25:27<14:50, 17.12s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  95%|█████████▍| 949/1000 [4:25:44<14:31, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 16.97 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  95%|█████████▌| 950/1000 [4:26:01<14:12, 17.05s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 16.96 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  95%|█████████▌| 951/1000 [4:26:18<13:57, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  95%|█████████▌| 952/1000 [4:26:36<13:42, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.20 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  95%|█████████▌| 953/1000 [4:26:53<13:25, 17.14s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  95%|█████████▌| 954/1000 [4:27:10<13:09, 17.16s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.18 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  96%|█████████▌| 955/1000 [4:27:27<12:52, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  96%|█████████▌| 956/1000 [4:27:44<12:33, 17.13s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  96%|█████████▌| 957/1000 [4:28:01<12:18, 17.18s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  96%|█████████▌| 958/1000 [4:28:19<12:03, 17.23s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.34 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  96%|█████████▌| 959/1000 [4:28:36<11:48, 17.28s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.39 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  96%|█████████▌| 960/1000 [4:28:54<11:33, 17.33s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.43 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  96%|█████████▌| 961/1000 [4:29:11<11:15, 17.33s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.31 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  96%|█████████▌| 962/1000 [4:29:28<10:58, 17.34s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.36 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  96%|█████████▋| 963/1000 [4:29:46<10:42, 17.36s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.40 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  96%|█████████▋| 964/1000 [4:30:03<10:22, 17.30s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  96%|█████████▋| 965/1000 [4:30:20<10:04, 17.26s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  97%|█████████▋| 966/1000 [4:30:37<09:45, 17.22s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  97%|█████████▋| 967/1000 [4:30:54<09:27, 17.19s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  97%|█████████▋| 968/1000 [4:31:11<09:09, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

🏁 Game Over!
OOX
XXO
O X
⏱️ Duration: 17.16 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 0
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes:  97%|█████████▋| 969/1000 [4:31:29<08:52, 17.17s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.14 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  97%|█████████▋| 970/1000 [4:31:46<08:33, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid 

Training episodes:  97%|█████████▋| 971/1000 [4:32:03<08:16, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [4]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X O
OXX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 7
Turn 8: player_2 → 1
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  97%|█████████▋| 972/1000 [4:32:20<07:59, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.10 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  97%|█████████▋| 973/1000 [4:32:37<07:43, 17.16s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.25 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes:  97%|█████████▋| 974/1000 [4:32:54<07:25, 17.13s/it]

[Mistral_X] LLM chose invalid move: 2, not in [1]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

🏁 Game Over!
O X
XXO
OOX
⏱️ Duration: 17.05 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 1
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  98%|█████████▊| 975/1000 [4:33:11<07:08, 17.15s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  98%|█████████▊| 976/1000 [4:33:28<06:50, 17.11s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  98%|█████████▊| 977/1000 [4:33:46<06:33, 17.11s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  98%|█████████▊| 978/1000 [4:34:03<06:15, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.04 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  98%|█████████▊| 979/1000 [4:34:20<05:59, 17.11s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  98%|█████████▊| 980/1000 [4:34:37<05:42, 17.13s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.17 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  98%|█████████▊| 981/1000 [4:34:54<05:25, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.08 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  98%|█████████▊| 982/1000 [4:35:11<05:08, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.12 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  98%|█████████▊| 983/1000 [4:35:28<04:50, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.03 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  98%|█████████▊| 984/1000 [4:35:45<04:33, 17.12s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.15 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  98%|█████████▊| 985/1000 [4:36:02<04:16, 17.08s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  99%|█████████▊| 986/1000 [4:36:19<03:58, 17.06s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.01 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 8
[Mistral_X] valid moves: [7]



Training episodes:  99%|█████████▊| 987/1000 [4:36:37<03:42, 17.09s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.13 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid 

Training episodes:  99%|█████████▉| 988/1000 [4:36:54<03:25, 17.09s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.09 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes:  99%|█████████▉| 989/1000 [4:37:11<03:08, 17.10s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.11 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid 

Training episodes:  99%|█████████▉| 990/1000 [4:37:28<02:50, 17.09s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.07 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes:  99%|█████████▉| 991/1000 [4:37:45<02:34, 17.14s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.24 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes:  99%|█████████▉| 992/1000 [4:38:02<02:16, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.00 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid moves: [1 4 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes:  99%|█████████▉| 993/1000 [4:38:19<01:59, 17.14s/it]

[Mistral_X] Chosen action: 7

🏁 Game Over!
XOX
XXO
O O
⏱️ Duration: 17.21 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 0
Turn 6: player_2 → 1
Turn 7: player_1 → 4
Turn 8: player_2 → 8
Turn 9: player_1 → 7
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid 

Training episodes:  99%|█████████▉| 994/1000 [4:38:36<01:42, 17.10s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 16.99 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [0 7 8

Training episodes: 100%|█████████▉| 995/1000 [4:38:53<01:25, 17.08s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XXO
OXO
⏱️ Duration: 17.02 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 4
Turn 6: player_2 → 1
Turn 7: player_1 → 7
Turn 8: player_2 → 8
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid move

Training episodes: 100%|█████████▉| 996/1000 [4:39:11<01:08, 17.19s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 17.44 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8

Training episodes: 100%|█████████▉| 997/1000 [4:39:28<00:51, 17.26s/it]

[Mistral_X] LLM chose invalid move: 2, not in [0]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

🏁 Game Over!
 OX
XOO
OXX
⏱️ Duration: 17.41 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 8
Turn 8: player_2 → 4
Turn 9: player_1 → 0
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

[DQAgent_O] Valid moves: [0 1 4 7]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 4 7]

[Mistral_X] LLM chose invalid move: 3, not in [1 4 7

Training episodes: 100%|█████████▉| 998/1000 [4:39:46<00:34, 17.34s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OXX
X O
OOX
⏱️ Duration: 17.51 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 8
Turn 6: player_2 → 0
Turn 7: player_1 → 1
Turn 8: player_2 → 7
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [0 1 4 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [0 4 8]

[Mistral_X] LLM chose invalid move: 2, not in [0 4 8]
[Mistral_X] Fallback random move: 0
[Mistral_X] Chosen action: 0

[DQAgent_O] Valid 

Training episodes: 100%|█████████▉| 999/1000 [4:40:03<00:17, 17.37s/it]

[Mistral_X] Chosen action: 4

🏁 Game Over!
XOX
X O
OXO
⏱️ Duration: 17.43 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 7
Turn 6: player_2 → 1
Turn 7: player_1 → 0
Turn 8: player_2 → 8
Turn 9: player_1 → 4
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 1
[Mistral_X] Chosen action: 1

[DQAgent_O] Valid moves: [0 4 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [7 8]
[DQAgent_O] Chosen action: 7
[Mistral_X] valid moves: [8]



Training episodes: 100%|██████████| 1000/1000 [4:40:25<00:00, 16.83s/it]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OXX
XXO
OO 
⏱️ Duration: 21.92 sec
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: player_1 → 3
Turn 4: player_2 → 6
Turn 5: player_1 → 1
Turn 6: player_2 → 0
Turn 7: player_1 → 4
Turn 8: player_2 → 7
Turn 9: player_1 → 8





AttributeError: 'DQAgent' object has no attribute 'save_model'

In [24]:
if __name__ == "__main__":
    run_multiple_episodes(n_episodes=1000, save_model_path="dq_model.pth")


=== 🎮 Episode 1 ===
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] (Exploring) Chosen action: 1
[Mistral_X] valid moves: [0 3 4 5 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] (Exploring) Chosen action: 0
[Mistral_X] valid moves: [4 5 6 7 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] (Exploring) Chosen action: 6
[Mistral_X] valid moves: [4 7 8]

[Mistral_X] Chosen action: 4

🏁 Game Over!
OOX
X X
O  
Rewards: {'player_1': 1, 'player_2': -1}
🏆 Winner: Mistral_X
Turn 1: player_1 → 2
Turn 2: player_2 → 1
Turn 3: player_1 → 3
Turn 4: player_2 → 0
Turn 5: player_1 → 5
Turn 6: player_2 → 6
Turn 7: player_1 → 4

=== 🎮 Episode 2 ===
[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] (Exploring) Chosen action: 7
[Mistral_X] valid moves: [0 1 3 4 5 6 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] (Exploring) Chosen action: 1
[Mistral_X] valid moves: [0 4 5 6 8]

[Mistral_X] Chosen action: 5

[DQAgent_O] (Exploring) 

##### Step-by-Step: Evaluate a Trained DQAgent

Load the trained model

In [25]:
agent_O = DQAgent(
    name="DQAgent_O",
    state_size=9,
    action_size=9,
    epsilon=0.0  # Turn off exploration (only best actions)
)
agent_O.load_model("dq_model.pth")  # Path to your saved model

Model loaded from dq_model.pth


  self.model.load_state_dict(torch.load(path))


Create SLM agent

In [26]:
agent_X = LLM_Agent(
    name="Mistral_X",
    model="Mistral-Nemo-Instruct-2407-GGUF",
    api_url="http://localhost:1234/v1/chat/completions"
)

In [27]:
def evaluate_agent(agent_llm, agent_dq, n_games=50):
    wins = {"player_1": 0, "player_2": 0}
    draws = 0

    for episode in range(n_games):
        rewards = run_game_with_agents(agent_llm, agent_dq, render=False)

        if rewards["player_1"] == 1:
            wins["player_1"] += 1
        elif rewards["player_2"] == 1:
            wins["player_2"] += 1
        else:
            draws += 1

    print("\n=== Evaluation Results ===")
    print(f"🏆 Wins (LLM): {wins['player_1']}")
    print(f"🤖 Wins (DQAgent): {wins['player_2']}")
    print(f"🤝 Draws: {draws}")


In [28]:
if __name__ == "__main__":
    evaluate_agent(agent_X, agent_O, n_games=50)

[Mistral_X] valid moves: [0 1 2 3 4 5 6 7 8]

[Mistral_X] Chosen action: 2

[DQAgent_O] Valid moves: [0 1 3 4 5 6 7 8]
[DQAgent_O] Chosen action: 5
[Mistral_X] valid moves: [0 1 3 4 6 7 8]

[Mistral_X] Chosen action: 3

[DQAgent_O] Valid moves: [0 1 4 6 7 8]
[DQAgent_O] Chosen action: 6
[Mistral_X] valid moves: [0 1 4 7 8]

[Mistral_X] LLM chose invalid move: 5, not in [0 1 4 7 8]
[Mistral_X] Fallback random move: 4
[Mistral_X] Chosen action: 4

[DQAgent_O] Valid moves: [0 1 7 8]
[DQAgent_O] Chosen action: 0
[Mistral_X] valid moves: [1 7 8]

[Mistral_X] LLM chose invalid move: 3, not in [1 7 8]
[Mistral_X] Fallback random move: 7
[Mistral_X] Chosen action: 7

[DQAgent_O] Valid moves: [1 8]
[DQAgent_O] Chosen action: 1
[Mistral_X] valid moves: [8]

[Mistral_X] LLM chose invalid move: 2, not in [8]
[Mistral_X] Fallback random move: 8
[Mistral_X] Chosen action: 8

🏁 Game Over!
OOX
XXO
OX 
Rewards: {'player_1': 0, 'player_2': 0}
🤝 Draw
Turn 1: player_1 → 2
Turn 2: player_2 → 5
Turn 3: play