In [1]:
# ==========================================================
#               LOCAL SETUP CELL
#  This cell configures the environment for local execution.
# ==========================================================

import torch
import os

# 1. Define the device
#    Locally, we will use the CPU.
print("Setting device to CPU for local execution.")
device = torch.device("cpu")

# 2. Set the working directory (optional but good practice)
#    This helps ensure paths like 'models/' work as expected.
#    If your notebook is in the 'notebooks' subfolder, you might need to go up one level.
try:
    # This tries to change directory to the project root if you run from 'notebooks' folder
    os.chdir('..')
    print(f"Changed working directory to: {os.getcwd()}")
except FileNotFoundError:
    # If it fails, you are likely already in the root directory.
    print(f"Already in project root directory: {os.getcwd()}")


# 3. Create the 'models' directory if it doesn't exist
models_dir = 'D:/mendikot_ai_project/models'
if not os.path.exists(models_dir):
    print(f"Directory '{models_dir}' not found. Creating it...")
    os.makedirs(models_dir)
else:
    print(f"Directory '{models_dir}' already exists.")

Setting device to CPU for local execution.
Changed working directory to: D:\mendikot_ai_project
Directory 'D:/mendikot_ai_project/models' already exists.


In [None]:
# ====================================================================
#              FINAL ALL-IN-ONE PRO AGENT TRAINING SCRIPT
# ====================================================================

# ------------------- Part 1: IMPORTS -------------------
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
from itertools import product
import math
import os
from datetime import datetime
from tqdm import tqdm
import torch.nn.functional as F


# ------------------- Part 2: HYPERPARAMETERS -------------------
NUM_EPISODES = 50000
REPLAY_BUFFER_SIZE = 10000
BATCH_SIZE = 128
LEARNING_RATE = 0.001
SAVE_EVERY = 1000
# CRITICAL: Updated state size for the Pro Agent
STATE_SIZE = 272
ACTION_SIZE = 12

# ------------------- Part 3: HELPER CLASSES & FUNCTIONS -------------------
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
    def push(self, state, policy, reward): self.buffer.append((state, policy, reward))
    def sample(self, batch_size): return random.sample(self.buffer, batch_size)
    def __len__(self): return len(self.buffer)

RANKS_48 = [str(r) for r in range(3, 11)] + ['J', 'Q', 'K', 'A']
SUITS = ['H', 'D', 'C', 'S']
CARD_TO_ID = {f"{rank}{suit}": i for i, (rank, suit) in enumerate(product(RANKS_48, SUITS))}
ID_TO_CARD = {i: card for card, i in CARD_TO_ID.items()}
def get_deck_48(): return list(range(48))
def get_rank_suit_from_id(card_id):
    if not (0 <= card_id < 48): raise ValueError("Invalid card ID")
    rank_idx = card_id // 4
    suit_idx = card_id % 4
    return RANKS_48[rank_idx], SUITS[suit_idx]


# ------------------- Part 4: MAIN GAME AND AI CLASSES -------------------
class GameState:
    def __init__(self, num_players=4, dealer_index=0):
        self.num_players = num_players
        if num_players == 4: self.cards_per_player = 12
        else: raise ValueError("Only 4 players are supported by this version.")
        
        self.dealer_index = dealer_index
        self.current_player_index = (dealer_index + 1) % num_players
        self.deck = get_deck_48()
        self.hands = {i: [] for i in range(num_players)}
        self.trick_cards, self.trick_suits_led, self.trump_suit, self.trump_declarer_team = [], set(), None, None
        self.mendis_captured, self.tricks_captured = {0: 0, 1: 0}, {0: 0, 1: 0}
        self.mendi_suit_ids = set()
        self.current_trick_lead_player = (dealer_index + 1) % num_players
        self.current_trick_lead_suit = None
        self.card_owner_history = np.zeros(48, dtype=int)
        self.void_suits = {p: set() for p in range(num_players)}
        self._initialize_mendi_ids()
        self.deal()

    def _initialize_mendi_ids(self):
        for suit_str in SUITS: self.mendi_suit_ids.add(CARD_TO_ID[f"10{suit_str}"])
    
    def deal(self):
        random.shuffle(self.deck)
        for i, card_id in enumerate(self.deck): self.hands[i % self.num_players].append(card_id)
        for hand in self.hands.values(): hand.sort()

    def get_team(self, player_idx): return player_idx % 2

    def get_legal_moves(self):
        player_hand = self.hands[self.current_player_index]
        if not player_hand: return []
        if not self.trick_cards:
            if sum(self.tricks_captured.values()) < 4 and len(self.trick_suits_led) < 4:
                possible_leads = [c for c in player_hand if get_rank_suit_from_id(c)[1] not in self.trick_suits_led]
                return possible_leads if possible_leads else player_hand
            return player_hand
        lead_suit = self.current_trick_lead_suit
        cards_of_lead_suit = [c for c in player_hand if get_rank_suit_from_id(c)[1] == lead_suit]
        return cards_of_lead_suit if cards_of_lead_suit else player_hand

    def play_card(self, player_idx, card_id):
        if player_idx != self.current_player_index: raise ValueError(f"Not Player {player_idx}'s turn.")
        if card_id not in self.hands[player_idx]: raise ValueError(f"Player {player_idx} does not have card {card_id}.")

        if self.trump_suit is None:
            if self.current_trick_lead_suit is not None:
                _, played_suit = get_rank_suit_from_id(card_id)
                can_follow = any(get_rank_suit_from_id(c)[1] == self.current_trick_lead_suit for c in self.hands[player_idx])
                if not can_follow and played_suit != self.current_trick_lead_suit:
                    self.set_trump(card_id, player_idx)
        
        self.card_owner_history[card_id] = player_idx + 1
        if self.current_trick_lead_suit is not None:
            _, played_suit = get_rank_suit_from_id(card_id)
            if played_suit != self.current_trick_lead_suit:
                self.void_suits[player_idx].add(self.current_trick_lead_suit)
        
        self.hands[player_idx].remove(card_id)
        self.trick_cards.append((player_idx, card_id))
        
        if len(self.trick_cards) == 1:
            self.current_trick_lead_player = player_idx
            _, lead_suit = get_rank_suit_from_id(card_id)
            self.current_trick_lead_suit = lead_suit
            self.trick_suits_led.add(lead_suit)
        
        if len(self.trick_cards) == self.num_players: self._resolve_trick()
        else: self.current_player_index = (self.current_player_index + 1) % self.num_players

    def _resolve_trick(self):
        lead_suit = self.current_trick_lead_suit
        winning_player, lead_card = self.trick_cards[0]
        highest_trump, highest_lead_suit = -1, lead_card
        for p_id, c_id in self.trick_cards:
            _, suit = get_rank_suit_from_id(c_id)
            if suit == self.trump_suit and c_id > highest_trump:
                highest_trump, winning_player = c_id, p_id
        if highest_trump == -1:
            for p_id, c_id in self.trick_cards:
                _, suit = get_rank_suit_from_id(c_id)
                if suit == lead_suit and c_id > highest_lead_suit:
                    highest_lead_suit, winning_player = c_id, p_id
        self.win_trick(winning_player)
        self.trick_cards.clear()
        self.current_trick_lead_suit = None

    def win_trick(self, winner_idx):
        winner_team = self.get_team(winner_idx)
        self.tricks_captured[winner_team] += 1
        for _, c_id in self.trick_cards:
            if c_id in self.mendi_suit_ids: self.mendis_captured[winner_team] += 1
        self.current_player_index = winner_idx

    def is_game_over(self): return sum(self.tricks_captured.values()) >= self.cards_per_player

    def set_trump(self, trump_card_id, declarer_player_id):
        self.trump_suit = get_rank_suit_from_id(trump_card_id)[1]
        self.trump_declarer_team = self.get_team(declarer_player_id)

    def get_final_rewards(self):
        m0, m1, t0, t1, r = self.mendis_captured[0], self.mendis_captured[1], self.tricks_captured[0], self.tricks_captured[1], 0
        if t0 == self.cards_per_player: r = 1000
        elif t1 == self.cards_per_player: r = -1000
        elif m0 == 4: r = 600
        elif m1 == 4: r = -600
        elif m0 == 3: r = 300
        elif m1 == 3: r = -300
        elif m0 == 2:
            if t0 > t1: r = 150
            elif t1 > t0: r = -150
            else:
                if self.trump_declarer_team == 0: r = -150
                elif self.trump_declarer_team == 1: r = 150
        return {0: r, 1: -r}

    def clone(self):
        new = GameState(self.num_players, self.dealer_index)
        new.current_player_index, new.hands = self.current_player_index, {p: h[:] for p, h in self.hands.items()}
        new.trick_cards, new.trick_suits_led, new.trump_suit = self.trick_cards[:], self.trick_suits_led.copy(), self.trump_suit
        new.trump_declarer_team, new.mendis_captured, new.tricks_captured = self.trump_declarer_team, self.mendis_captured.copy(), self.tricks_captured.copy()
        new.current_trick_lead_player, new.current_trick_lead_suit = self.current_trick_lead_player, self.current_trick_lead_suit
        # CORRECTED CLONE METHOD
        new.card_owner_history = self.card_owner_history.copy()
        new.void_suits = {p: s.copy() for p, s in self.void_suits.items()}
        return new

class MCTSNode:
    def __init__(self, parent=None, prior_p=1.0):
        self.parent, self.children, self.n_visits, self.q_value, self.prior_p = parent, {}, 0, 0.0, prior_p
    def expand(self, action_priors):
        for action, prob in action_priors.items():
            if action not in self.children: self.children[action] = MCTSNode(parent=self, prior_p=prob)
    def select(self, c_puct): return max(self.children.items(), key=lambda act_node: act_node[1].get_ucb_score(c_puct))
    def update(self, leaf_value):
        self.n_visits += 1; self.q_value += (leaf_value - self.q_value) / self.n_visits
    def get_ucb_score(self, c_puct):
        u_value = c_puct * self.prior_p * math.sqrt(self.parent.n_visits) / (1 + self.n_visits)
        return self.q_value + u_value
    def is_leaf(self): return len(self.children) == 0

class MCTS:
    def __init__(self, model, c_puct=1.0, n_simulations=100):
        self.model, self.c_puct, self.n_simulations = model, c_puct, n_simulations
        self.state_to_tensor_func = None
    def _playout(self, state, node):
        while not node.is_leaf():
            action, node = node.select(self.c_puct); state.play_card(state.current_player_index, action)
        if not state.is_game_over():
            player_id = state.current_player_index
            player_hand, legal_moves = state.hands[player_id], state.get_legal_moves()
            if not legal_moves: leaf_value = state.get_final_rewards()[state.get_team(player_id)]
            else:
                state_tensor = self.state_to_tensor_func(state, player_id)
                with torch.no_grad(): policy_logits, leaf_value_tensor = self.model(state_tensor)
                leaf_value = leaf_value_tensor.item()
                hand_map = {card_id: i for i, card_id in enumerate(player_hand)}
                legal_hand_indices = [hand_map[card_id] for card_id in legal_moves]
                legal_logits = policy_logits[0, legal_hand_indices]
                action_probs = F.softmax(legal_logits, dim=0).cpu().numpy()
                node.expand({move: prob for move, prob in zip(legal_moves, action_probs)})
        else: leaf_value = state.get_final_rewards()[state.get_team(state.current_player_index)]
        curr_node = node
        while curr_node is not None: curr_node.update(leaf_value); curr_node = curr_node.parent
    def get_move_probs(self, state, state_to_tensor_func, temp=1e-3):
        self.state_to_tensor_func = state_to_tensor_func
        root = MCTSNode()
        player_id = state.current_player_index
        player_hand, legal_moves = state.hands[player_id], state.get_legal_moves()
        if not legal_moves: return [], torch.tensor([])
        if len(legal_moves) == 1: return legal_moves, torch.tensor([1.0])
        
        state_tensor = self.state_to_tensor_func(state, player_id)
        with torch.no_grad(): policy_logits, _ = self.model(state_tensor)
        
        hand_map = {card_id: i for i, card_id in enumerate(player_hand)}
        legal_hand_indices = [hand_map[card_id] for card_id in legal_moves]
        legal_logits = policy_logits[0, legal_hand_indices]
        action_probs = F.softmax(legal_logits, dim=0).cpu().numpy()
        
        alpha, epsilon = 0.3, 0.25
        noise = np.random.dirichlet([alpha] * len(action_probs))
        noisy_probs = (1 - epsilon) * action_probs + epsilon * noise
        
        root.expand({move: prob for move, prob in zip(legal_moves, noisy_probs)})
        
        for _ in range(self.n_simulations): self._playout(state.clone(), root)
        
        act_visits = [(act, node.n_visits) for act, node in root.children.items()]
        acts, visits = zip(*act_visits)
        act_probs = F.softmax(torch.tensor(visits, dtype=torch.float32) / temp, dim=0)
        return acts, act_probs

class MendikotModel(nn.Module):
    def __init__(self, state_size, action_size, num_players=4):
        super(MendikotModel, self).__init__()
        self.fc_layers = nn.Sequential(nn.Linear(state_size, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU())
        self.policy_head, self.value_head = nn.Linear(512, action_size), nn.Linear(512, 1)
    def forward(self, state_tensor):
        x = self.fc_layers(state_tensor); return self.policy_head(x), torch.tanh(self.value_head(x))

class MendikotTrainer:
    def __init__(self, num_players, state_size, action_size):
        self.num_players, self.state_size, self.action_size = num_players, state_size, action_size
        self.model = MendikotModel(state_size, action_size, num_players).to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=LEARNING_RATE)
        self.replay_buffer = ReplayBuffer(REPLAY_BUFFER_SIZE)
        run_name = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        self.run_models_dir = os.path.join('models', run_name)
        os.makedirs(self.run_models_dir, exist_ok=True)
        print(f"--- Starting new training run: {run_name} ---\nModels will be saved in: {self.run_models_dir}")
    def state_to_tensor(self, game_state, player_id):
        my_hand_vec = np.zeros(48); my_hand_vec[game_state.hands[player_id]] = 1
        card_owners_vec = np.zeros((48, 4))
        for card_idx in range(48):
            owner = game_state.card_owner_history[card_idx]
            if owner > 0: card_owners_vec[card_idx, owner - 1] = 1
        card_owners_vec = card_owners_vec.flatten()
        void_vec = np.zeros((4, 4))
        suit_map = {'H': 0, 'D': 1, 'C': 2, 'S': 3}
        for p_id in range(4):
            for suit_char in game_state.void_suits[p_id]: void_vec[p_id, suit_map[suit_char]] = 1
        void_vec = void_vec.flatten()
        trump_vec, lead_suit_vec = np.zeros(4), np.zeros(4)
        if game_state.trump_suit is not None: trump_vec[suit_map.get(game_state.trump_suit, 0)] = 1
        if game_state.current_trick_lead_suit is not None: lead_suit_vec[suit_map.get(game_state.current_trick_lead_suit, 0)] = 1
        player_info = np.array([player_id/4.0, game_state.current_player_index/4.0, game_state.dealer_index/4.0, len(game_state.trick_cards)/4.0])
        scores_vec = np.array([game_state.mendis_captured[0]/4.0, game_state.tricks_captured[0]/self.action_size, game_state.mendis_captured[1]/4.0, game_state.tricks_captured[1]/self.action_size])
        state_vector = np.concatenate([my_hand_vec, card_owners_vec, void_vec, trump_vec, lead_suit_vec, player_info, scores_vec])
        return torch.FloatTensor(state_vector).unsqueeze(0).to(device)
    def choose_action(self, game_state, temp=1.0):
        mcts = MCTS(self.model, n_simulations=100)
        legal_moves, move_probs = mcts.get_move_probs(game_state, self.state_to_tensor, temp=temp)
        if not legal_moves: return None, None
        chosen_idx = np.random.choice(len(legal_moves), p=move_probs.numpy())
        chosen_card = legal_moves[chosen_idx]
        move_probs_for_learning = torch.zeros(self.action_size, device=device)
        hand_map = {card_id: i for i, card_id in enumerate(game_state.hands[game_state.current_player_index])}
        for move, prob in zip(legal_moves, move_probs):
            if move in hand_map: move_probs_for_learning[hand_map[move]] = prob
        return chosen_card, move_probs_for_learning
    def run_episode(self):
        game = GameState(num_players=self.num_players)
        episode_history = []
        while not game.is_game_over():
            player_id = game.current_player_index
            action_card, move_probs = self.choose_action(game)
            if action_card is None: break
            state_tensor = self.state_to_tensor(game, player_id)
            episode_history.append({'state': state_tensor, 'policy': move_probs, 'player': player_id})
            game.play_card(player_id, action_card)
        final_rewards = game.get_final_rewards()
        for step in episode_history:
            team_id = game.get_team(step['player'])
            step['reward'] = final_rewards[team_id]
            self.replay_buffer.push(step['state'], step['policy'], step['reward'])
    def learn(self):
        if len(self.replay_buffer) < BATCH_SIZE: return
        batch = self.replay_buffer.sample(BATCH_SIZE)
        states, target_policies, rewards = zip(*batch)
        states_tensor, target_policies_tensor, rewards_tensor = torch.cat(states).to(device), torch.stack(target_policies).to(device), torch.FloatTensor(rewards).to(device)
        pred_policies_logits, pred_values = self.model(states_tensor)
        pred_values = pred_values.squeeze()
        value_loss = F.mse_loss(pred_values, rewards_tensor)
        policy_loss = F.cross_entropy(pred_policies_logits, target_policies_tensor)
        total_loss = policy_loss + value_loss
        self.optimizer.zero_grad()
        total_loss.backward()
        self.optimizer.step()
    def train(self):
        for episode in tqdm(range(NUM_EPISODES)):
            self.run_episode()
            self.learn()
            if (episode + 1) % SAVE_EVERY == 0:
                save_path = os.path.join(self.run_models_dir, f"mendikot_model_ep_{episode+1}.pth")
                torch.save(self.model.state_dict(), save_path)
        print("Training complete!")

# ------------------- Part 5: MAIN EXECUTION -------------------
if __name__ == '__main__':
    print("--- Hyperparameters Set for Training ---")
    print(f"State Size: {STATE_SIZE}, Action Size: {ACTION_SIZE}")
    print(f"Total Episodes: {NUM_EPISODES}")
    print("----------------------------------------")
    
    trainer = MendikotTrainer(num_players=4, state_size=STATE_SIZE, action_size=ACTION_SIZE)
    trainer.train()

--- Hyperparameters Set for Training ---
State Size: 272, Action Size: 12
Total Episodes: 50000
----------------------------------------
--- Starting new training run: 2025-10-25_09-10-03 ---
Models will be saved in: models\2025-10-25_09-10-03


  0%|                                                                            | 73/50000 [01:51<20:08:46,  1.45s/it]

In [None]:
# ==========================================================
#               IMPROVED LOCAL SETUP CELL
#  This cell configures the environment for local execution.
# ==========================================================

import torch
import os
import sys

# 1. Change the working directory to the project root
#    This is the key to finding the 'src' and 'models' folders.
#    '..' means "the parent directory".
#    This handles the case where the notebook is run from the 'notebooks' subfolder.
if os.path.basename(os.getcwd()) == 'notebooks':
    os.chdir('..')
print(f"Current Working Directory set to: {os.getcwd()}")

# 2. Add the project root to the Python path for imports
#    This allows 'from src.game import GameState' to work.
if '.' not in sys.path:
    sys.path.insert(0, '.')

# 3. Define the device
print("Setting device to CPU for local execution.")
device = torch.device("cpu")

# 4. Create the 'models' directory if it doesn't exist (as a safety check)
models_dir = 'D:/mendikot_ai_project/models'
if not os.path.exists(models_dir):
    print(f"Warning: Directory '{models_dir}' not found. Creating it...")
    os.makedirs(models_dir)
else:
    print(f"Directory '{models_dir}' found.")

In [None]:
# ====================================================================
#          FINAL DEBUG & ANALYSIS CELL
# ====================================================================

# 1. Imports
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
import sys

# --- DIAGNOSTICS: Let's find out where we are ---
print(f"Current Working Directory: {os.getcwd()}")
try:
    print(f"Contents of 'models' directory: {os.listdir('models')}")
except FileNotFoundError:
    print("ERROR: Could not find the 'models' directory from the current location.")
print("-" * 20)
# --- END OF DIAGNOSTICS ---

# Ensure our custom classes from 'src' are available
# If you are running from the 'notebooks' folder, this is necessary.
if '..' not in sys.path:
    sys.path.append('..') 
from src.game import GameState
from src.agent import MendikotModel

# --- Configuration ---
# ---!! IMPORTANT !! ---
# ---!! COPY THE EXACT FOLDER NAME FROM THE OUTPUT ABOVE !! ---
RUN_FOLDER_NAME = '2025-10-24_20-15-23'  # <-- Double-check this name!
CHECKPOINT_FILE_NAME = 'mendikot_model_ep_3000.pth' # <-- Check if this file exists!

# Build the path in an OS-safe way
MODEL_CHECKPOINT_TO_TEST = os.path.join('models', RUN_FOLDER_NAME, CHECKPOINT_FILE_NAME)

NUM_GAMES_TO_SIMULATE = 500
AI_PLAYER_ID = 0
STATE_SIZE = 112
ACTION_SIZE = 12
device = torch.device('cpu')
print(f"Analysis running on device: {device}")

# (The rest of the code is the same as before...)
# ... (state_to_tensor, RandomAgent, load_trained_agent, evaluate_model, and the final execution block)
def state_to_tensor(game_state, player_id):
    my_hand_vec = np.zeros(48); my_hand_vec[game_state.hands[player_id]] = 1
    played_history_vec = game_state.played_cards_history
    suit_map = {'H': 0, 'D': 1, 'C': 2, 'S': 3}
    trump_vec, lead_suit_vec = np.zeros(4), np.zeros(4)
    if game_state.trump_suit is not None: trump_vec[suit_map.get(game_state.trump_suit, 0)] = 1
    if game_state.current_trick_lead_suit is not None: lead_suit_vec[suit_map.get(game_state.current_trick_lead_suit, 0)] = 1
    player_info = np.array([player_id/4.0, game_state.current_player_index/4.0, game_state.dealer_index/4.0, len(game_state.trick_cards)/4.0])
    scores_vec = np.array([game_state.mendis_captured[0]/4.0, game_state.tricks_captured[0]/ACTION_SIZE, game_state.mendis_captured[1]/4.0, game_state.tricks_captured[1]/ACTION_SIZE])
    state_vector = np.concatenate([my_hand_vec, played_history_vec, trump_vec, lead_suit_vec, player_info, scores_vec])
    return torch.FloatTensor(state_vector).unsqueeze(0)
class RandomAgent:
    def choose_action(self, game_state):
        legal_moves = game_state.get_legal_moves()
        return np.random.choice(legal_moves) if legal_moves else None
def load_trained_agent(model_path, state_size, action_size, num_players):
    print(f"Attempting to load model from: {model_path}")
    if not os.path.exists(model_path):
        print(f"--> ERROR: Model file not found at the specified path."); return None
    model = MendikotModel(state_size, action_size, num_players)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()
    return model
def evaluate_model(model, num_games, ai_player_id):
    wins, total_rewards = 0, []
    random_agent = RandomAgent()
    for _ in tqdm(range(num_games), desc="Evaluating Model"):
        game = GameState(num_players=4)
        ai_team = game.get_team(ai_player_id)
        while not game.is_game_over():
            player_id = game.current_player_index
            chosen_card = None
            if game.get_team(player_id) == ai_team:
                with torch.no_grad():
                    state_tensor = state_to_tensor(game, player_id).to(device)
                    player_hand = game.hands[player_id]
                    legal_moves = game.get_legal_moves()
                    if not legal_moves: break
                    policy_logits, _ = model(state_tensor)
                    hand_map = {card_id: i for i, card_id in enumerate(player_hand)}
                    legal_hand_indices = [hand_map[card_id] for card_id in legal_moves if card_id in hand_map]
                    best_action_idx = -1; best_logit = -float('inf')
                    for idx in legal_hand_indices:
                        if policy_logits[0, idx] > best_logit:
                            best_logit = policy_logits[0, idx]; best_action_idx = idx
                    if best_action_idx != -1: chosen_card = player_hand[best_action_idx]
            else:
                chosen_card = random_agent.choose_action(game)
            if chosen_card is None: break
            game.play_card(player_id, chosen_card)
        ai_reward = game.get_final_rewards()[ai_team]
        total_rewards.append(ai_reward)
        if ai_reward > 0: wins += 1
    return wins, total_rewards
if __name__ == '__main__':
    trained_model = load_trained_agent(MODEL_CHECKPOINT_TO_TEST, STATE_SIZE, ACTION_SIZE, 4)
    if trained_model:
        wins, rewards = evaluate_model(trained_model, NUM_GAMES_TO_SIMULATE, AI_PLAYER_ID)
        win_rate = (wins / NUM_GAMES_TO_SIMULATE) * 100
        avg_reward = np.mean(rewards)
        print("\n--- Evaluation Results ---")
        print(f"Model: {MODEL_CHECKPOINT_TO_TEST}")
        print(f"Games Played: {NUM_GAMES_TO_SIMULATE}")
        print(f"Win Rate vs. Random Agents: {win_rate:.2f}%")
        print(f"Average Reward per Game: {avg_reward:.2f}")
        plt.style.use('seaborn-v0_8-whitegrid')
        fig, ax1 = plt.subplots(figsize=(12, 6))
        sns.histplot(rewards, bins=20, kde=True, ax=ax1, color='skyblue', label='Reward Distribution')
        ax1.set_title(f'Reward Distribution for Model: {os.path.basename(MODEL_CHECKPOINT_TO_TEST)}', fontsize=16)
        ax1.set_xlabel('Final Reward', fontsize=12); ax1.set_ylabel('Number of Games', fontsize=12)
        ax1.axvline(avg_reward, color='red', linestyle='--', label=f'Avg Reward: {avg_reward:.2f}')
        ax1.axvline(0, color='black', linestyle='-', linewidth=0.8)
        ax1.legend()
        plt.show()

In [None]:
# ====================================================================
#          FINAL MODEL ANALYSIS AND VISUALIZATION CELL
#       (Completely independent of the training script)
# ====================================================================

# 1. Imports for this cell
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import sys
import gc

# Ensure our custom classes from the 'src' folder are available
sys.path.append('..') 
from src.game import GameState
from src.agent import MendikotModel

# --- Configuration for the Analysis ---
MODEL_CHECKPOINT_TO_TEST = 'D:/mendikot_ai_project/models/2025-10-24_20-15-23/mendikot_model_ep_3000.pth'
NUM_GAMES_TO_SIMULATE = 500
AI_PLAYER_ID = 0
STATE_SIZE = 112
ACTION_SIZE = 12

# --- NEW: DEFINE THE DEVICE ---
# We are running this locally, so we'll use the CPU.
device = torch.device('cpu')
print(f"Analysis running on device: {device}")
# --- END OF NEW ---

# 2. Define a standalone state_to_tensor helper function
def state_to_tensor(game_state, player_id):
    # ... (the function is the same as before, no changes needed)
    my_hand_vec = np.zeros(48); my_hand_vec[game_state.hands[player_id]] = 1
    played_history_vec = game_state.played_cards_history
    suit_map = {'H': 0, 'D': 1, 'C': 2, 'S': 3}
    trump_vec, lead_suit_vec = np.zeros(4), np.zeros(4)
    if game_state.trump_suit is not None: trump_vec[suit_map.get(game_state.trump_suit, 0)] = 1
    if game_state.current_trick_lead_suit is not None: lead_suit_vec[suit_map.get(game_state.current_trick_lead_suit, 0)] = 1
    player_info = np.array([player_id/4.0, game_state.current_player_index/4.0, game_state.dealer_index/4.0, len(game_state.trick_cards)/4.0])
    scores_vec = np.array([game_state.mendis_captured[0]/4.0, game_state.tricks_captured[0]/ACTION_SIZE, game_state.mendis_captured[1]/4.0, game_state.tricks_captured[1]/ACTION_SIZE])
    state_vector = np.concatenate([my_hand_vec, played_history_vec, trump_vec, lead_suit_vec, player_info, scores_vec])
    return torch.FloatTensor(state_vector).unsqueeze(0)

# 3. Define a simple Baseline Agent
class RandomAgent:
    def choose_action(self, game_state):
        legal_moves = game_state.get_legal_moves()
        return np.random.choice(legal_moves) if legal_moves else None

# 4. Function to load our trained AI model
def load_trained_agent(model_path, state_size, action_size, num_players):
    print(f"Loading model from: {model_path}")
    if not os.path.exists(model_path):
        print(f"ERROR: Model file not found at {model_path}"); return None
    model = MendikotModel(state_size, action_size, num_players)
    # The 'device' variable will now be correctly found
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device) # Move the model to the specified device
    model.eval()
    return model

# 5. The main evaluation loop
def evaluate_model(model, num_games, ai_player_id):
    wins, total_rewards = 0, []
    random_agent = RandomAgent()
    for _ in tqdm(range(num_games), desc="Evaluating Model"):
        game = GameState(num_players=4)
        ai_team = game.get_team(ai_player_id)
        while not game.is_game_over():
            player_id = game.current_player_index
            chosen_card = None
            if game.get_team(player_id) == ai_team:
                with torch.no_grad():
                    state_tensor = state_to_tensor(game, player_id).to(device) # Ensure tensor is on the correct device
                    player_hand = game.hands[player_id]
                    legal_moves = game.get_legal_moves()
                    if not legal_moves: break
                    
                    policy_logits, _ = model(state_tensor)
                    hand_map = {card_id: i for i, card_id in enumerate(player_hand)}
                    legal_hand_indices = [hand_map[card_id] for card_id in legal_moves if card_id in hand_map]
                    
                    best_action_idx = -1; best_logit = -float('inf')
                    for idx in legal_hand_indices:
                        if policy_logits[0, idx] > best_logit:
                            best_logit = policy_logits[0, idx]; best_action_idx = idx
                    
                    if best_action_idx != -1: chosen_card = player_hand[best_action_idx]
            else:
                chosen_card = random_agent.choose_action(game)
            if chosen_card is None: break
            game.play_card(player_id, chosen_card)
            
        ai_reward = game.get_final_rewards()[ai_team]
        total_rewards.append(ai_reward)
        if ai_reward > 0: wins += 1

        gc.collect()
    return wins, total_rewards

# 6. Run the evaluation and print/plot results
if __name__ == '__main__':
    trained_model = load_trained_agent(MODEL_CHECKPOINT_TO_TEST, STATE_SIZE, ACTION_SIZE, 4)
    if trained_model:
        wins, rewards = evaluate_model(trained_model, NUM_GAMES_TO_SIMULATE, AI_PLAYER_ID)
        win_rate = (wins / NUM_GAMES_TO_SIMULATE) * 100
        avg_reward = np.mean(rewards)
        print("\n--- Evaluation Results ---")
        print(f"Model: {MODEL_CHECKPOINT_TO_TEST}")
        print(f"Games Played: {NUM_GAMES_TO_SIMULATE}")
        print(f"Win Rate vs. Random Agents: {win_rate:.2f}%")
        print(f"Average Reward per Game: {avg_reward:.2f}")
        
        plt.style.use('seaborn-v0_8-whitegrid')
        fig, ax1 = plt.subplots(figsize=(12, 6))
        sns.histplot(rewards, bins=20, kde=True, ax=ax1, color='skyblue', label='Reward Distribution')
        ax1.set_title(f'Reward Distribution for Model: {os.path.basename(MODEL_CHECKPOINT_TO_TEST)}', fontsize=16)
        ax1.set_xlabel('Final Reward', fontsize=12); ax1.set_ylabel('Number of Games', fontsize=12)
        ax1.axvline(avg_reward, color='red', linestyle='--', label=f'Avg Reward: {avg_reward:.2f}')
        ax1.axvline(0, color='black', linestyle='-', linewidth=0.8)
        ax1.legend()
        plt.show()