In [26]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import math
import sys

# --- COSTANTI DEL GIOCO ---
VUOTO = 0
NERO_PEDINA = 1
NERO_DAMA = 2
BIANCO_PEDINA = 3
BIANCO_DAMA = 4
DIMENSIONE = 8
ACTION_DIM = DIMENSIONE**4  # 4096 azioni possibili

print("Cella 1: Importazioni e Costanti caricate.")

Cella 1: Importazioni e Costanti caricate.


In [28]:
class CheckersEnv(gym.Env):
    """
    Ambiente Dama (Checkers) per Gymnasium.
    Logica completa con movimento pedine, dame e catture.
    """
    
    def __init__(self):
        super(CheckersEnv, self).__init__()
        
        # Spazio delle Azioni: intero che rappresenta (r1, c1, r2, c2)
        self.action_space = spaces.Discrete(DIMENSIONE**4)
        
        # Spazio degli Stati: Matrice 8x8
        self.observation_space = spaces.Box(
            low=VUOTO, 
            high=BIANCO_DAMA, 
            shape=(DIMENSIONE, DIMENSIONE), 
            dtype=np.int32
        )
        
        self.player_map = {
            NERO_PEDINA: NERO_DAMA, 
            BIANCO_PEDINA: BIANCO_DAMA
        }
        
        self.board = None
        self.current_player = None
        self.possible_moves = []

    def _initialize_board(self):
        """Imposta il tabellone iniziale."""
        board = np.zeros((DIMENSIONE, DIMENSIONE), dtype=np.int32)
        # NERO (1) in alto
        for r in range(3):
            for c in range(DIMENSIONE):
                if (r + c) % 2 == 1: 
                    board[r, c] = NERO_PEDINA
        # BIANCO (3) in basso
        for r in range(5, 8):
            for c in range(DIMENSIONE):
                if (r + c) % 2 == 1:
                    board[r, c] = BIANCO_PEDINA
        return board
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.board = self._initialize_board()
        self.current_player = NERO_PEDINA 
        self.possible_moves = self._find_all_legal_moves()
        return self.board.copy(), {'current_player': self.current_player}
        
    def render(self):
        print(f"\nTurno: {'NERO' if self.current_player in [1,2] else 'BIANCO'}")
        print("   0 1 2 3 4 5 6 7")
        for r, row in enumerate(self.board):
            print(f"{r} {row}")

    # --- LOGICA MOSSE ---
    def _decode_action(self, action_int):
        r1 = action_int // (DIMENSIONE**3)
        c1 = (action_int // (DIMENSIONE**2)) % DIMENSIONE
        r2 = (action_int // DIMENSIONE) % DIMENSIONE
        c2 = action_int % DIMENSIONE
        return r1, c1, r2, c2

    def _find_all_legal_moves(self):
        captures = []
        normal_moves = []
        king_val = self.player_map.get(self.current_player, 0)
        
        for r in range(DIMENSIONE):
            for c in range(DIMENSIONE):
                piece = self.board[r, c]
                if piece == self.current_player or piece == king_val:
                    captures.extend(self._get_piece_captures(r, c))
                    if not captures:
                        normal_moves.extend(self._get_piece_moves(r, c))
        
        return captures if captures else normal_moves

    def _get_piece_moves(self, r, c):
        moves = []
        piece = self.board[r, c]
        if piece == NERO_PEDINA: dirs = [(1, -1), (1, 1)]
        elif piece == BIANCO_PEDINA: dirs = [(-1, -1), (-1, 1)]
        else: dirs = [(-1, -1), (-1, 1), (1, -1), (1, 1)] # Dame

        for dr, dc in dirs:
            nr, nc = r + dr, c + dc
            if 0 <= nr < DIMENSIONE and 0 <= nc < DIMENSIONE:
                if self.board[nr, nc] == VUOTO:
                    moves.append((r, c, nr, nc))
        return moves

    def _get_piece_captures(self, r, c):
        captures = []
        piece = self.board[r, c]
        
        if piece in (NERO_PEDINA, NERO_DAMA):
            enemies = (BIANCO_PEDINA, BIANCO_DAMA)
            pawn_dirs = [(1, -1), (1, 1)]
        else:
            enemies = (NERO_PEDINA, NERO_DAMA)
            pawn_dirs = [(-1, -1), (-1, 1)]
            
        is_king = piece in (NERO_DAMA, BIANCO_DAMA)
        dirs = [(-1, -1), (-1, 1), (1, -1), (1, 1)] if is_king else pawn_dirs
        
        for dr, dc in dirs:
            mid_r, mid_c = r + dr, c + dc
            land_r, land_c = r + 2*dr, c + 2*dc
            
            if 0 <= land_r < DIMENSIONE and 0 <= land_c < DIMENSIONE:
                if self.board[mid_r, mid_c] in enemies:
                    if self.board[land_r, land_c] == VUOTO:
                        captures.append((r, c, land_r, land_c))
        return captures

    def _execute_move(self, r1, c1, r2, c2):
        piece = self.board[r1, c1]
        self.board[r2, c2] = piece
        self.board[r1, c1] = VUOTO
        reward = 0
        
        # Cattura
        if abs(r2 - r1) == 2:
            self.board[(r1 + r2) // 2, (c1 + c2) // 2] = VUOTO
            reward += 1.0
            
        # Promozione
        if piece == NERO_PEDINA and r2 == 7:
            self.board[r2, c2] = NERO_DAMA
            reward += 3.0
        elif piece == BIANCO_PEDINA and r2 == 0:
            self.board[r2, c2] = BIANCO_DAMA
            reward += 3.0
            
        return reward

    def step(self, action):
        r1, c1, r2, c2 = self._decode_action(action)
        move = (r1, c1, r2, c2)
        terminated = False
        reward = 0
        
        if move in self.possible_moves:
            reward += self._execute_move(r1, c1, r2, c2)
            # Cambio turno
            self.current_player = BIANCO_PEDINA if self.current_player == NERO_PEDINA else NERO_PEDINA
            
            # Controllo fine gioco
            self.possible_moves = self._find_all_legal_moves()
            if not self.possible_moves:
                terminated = True
                reward += 10.0 # Vittoria per chi ha appena mosso
        else:
            reward = -10.0 # Mossa illegale
            terminated = True 

        return self.board.copy(), reward, terminated, False, {}

print("Cella 2: Ambiente CheckersEnv definito.")

Cella 2: Ambiente CheckersEnv definito.


In [29]:
class DQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()
        
        # Rete Convoluzionale (CNN) per analizzare la scacchiera
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Flatten()
        )
        
        # Dimensione dopo il flatten: 64 canali * 8 * 8
        conv_out_size = 64 * input_shape[0] * input_shape[1]
        
        # Strati Fully Connected
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512),
            nn.ReLU(),
            nn.Linear(512, n_actions) # Output: 4096 valori Q
        )

    def forward(self, x):
        # Aggiunge dimensione canale: (Batch, 8, 8) -> (Batch, 1, 8, 8)
        x = x.unsqueeze(1).float() 
        conv_out = self.conv(x)
        return self.fc(conv_out)

print("Cella 3: Modello DQN definito.")

Cella 3: Modello DQN definito.


In [30]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.memory = deque(maxlen=capacity)

    def push(self, state, action, next_state, reward, done):
        self.memory.append((state, action, next_state, float(reward), done))

    def sample(self, batch_size):
        transitions = random.sample(self.memory, batch_size)
        batch = list(zip(*transitions))
        
        states = torch.tensor(np.array(batch[0]), dtype=torch.float32)
        actions = torch.tensor(batch[1], dtype=torch.long)
        next_states = torch.tensor(np.array(batch[2]), dtype=torch.float32)
        rewards = torch.tensor(batch[3], dtype=torch.float32)
        dones = torch.tensor(batch[4], dtype=torch.bool)
        
        return states, actions, next_states, rewards, dones

    def __len__(self):
        return len(self.memory)

print("Cella 4: ReplayBuffer definito.")

Cella 4: ReplayBuffer definito.


In [31]:
# --- HYPERPARAMETERS ---
BATCH_SIZE = 64
GAMMA = 0.99
EPS_START = 1.0
EPS_END = 0.05
EPS_DECAY = 10000
TARGET_UPDATE = 500
LR = 0.001

# --- SETUP ---
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = CheckersEnv()

policy_net = DQN((DIMENSIONE, DIMENSIONE), ACTION_DIM).to(device)
target_net = DQN((DIMENSIONE, DIMENSIONE), ACTION_DIM).to(device)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=LR)
memory = ReplayBuffer(10000)
steps_done = 0

def select_action(state):
    global steps_done
    eps_threshold = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
    steps_done += 1
    
    if random.random() > eps_threshold:
        with torch.no_grad():
            state_t = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
            # Sceglie l'azione con il Q-value più alto
            return policy_net(state_t).argmax(1).item()
    else:
        # Azione casuale
        return env.action_space.sample()

def optimize_model():
    if len(memory) < BATCH_SIZE:
        return
    
    states, actions, next_states, rewards, dones = memory.sample(BATCH_SIZE)
    states, actions, next_states, rewards, dones = states.to(device), actions.to(device), next_states.to(device), rewards.to(device), dones.to(device)

    # Q(s, a)
    q_values = policy_net(states).gather(1, actions.unsqueeze(1)).squeeze(1)
    
    # V(s')
    next_q_values = target_net(next_states).max(1)[0].detach()
    target_q_values = rewards + (GAMMA * next_q_values * (~dones))

    loss = nn.MSELoss()(q_values, target_q_values)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

print(f"Cella 5: Setup completato su {device}.")

Cella 5: Setup completato su cpu.


In [32]:
def train_agent(num_episodes):
    print(f"Inizio training per {num_episodes} episodi...")
    
    for i_episode in range(num_episodes):
        state, _ = env.reset()
        done = False
        total_reward = 0
        
        while not done:
            action = select_action(state)
            next_state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            
            memory.push(state, action, next_state, reward, done)
            state = next_state
            total_reward += reward
            
            optimize_model()
            
        if i_episode % TARGET_UPDATE == 0:
            target_net.load_state_dict(policy_net.state_dict())
            
        if i_episode % 100 == 0:
            print(f"Episodio {i_episode}, Reward Totale: {total_reward:.1f}, Epsilon: {EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY):.2f}")

    print("Fine training.")
    torch.save(policy_net.state_dict(), "dama_dqn_final.pth")

print("Cella 6: Funzione train_agent pronta.")

Cella 6: Funzione train_agent pronta.


In [34]:
# ==============================================================================
# MAIN - AVVIO DEL PROGRAMMA CON ACTION MASKING
# ==============================================================================

# Funzione per convertire una mossa (r1, c1, r2, c2) in indice azione (0-4095)
def encode_action(r1, c1, r2, c2):
    return r1 * (DIMENSIONE**3) + c1 * (DIMENSIONE**2) + r2 * DIMENSIONE + c2

# 1. ADDESTRAMENTO
# Aumentiamo un po' gli episodi per dargli una chance di imparare qualcosa
print("--- INIZIO ADDESTRAMENTO ---")
train_agent(num_episodes=100000) 

# 2. TEST PARTITA (Con Mascheramento delle azioni illegali)
print("\n--- TEST PARTITA (AI vs AI) ---")
obs, info = env.reset()
env.render()

step_count = 0
max_steps = 50 # Limitiamo la partita a 50 mosse per evitare loop infiniti

for _ in range(max_steps):
    step_count += 1
    
    # 1. Ottieni le previsioni dalla rete per TUTTE le azioni
    state_tensor = torch.tensor(obs, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad():
        q_values = policy_net(state_tensor).cpu().numpy().flatten() # Vettore di 4096 valori
    
    # 2. Ottieni le mosse legali dall'ambiente
    legal_moves_tuples = env.possible_moves # Lista di (r1, c1, r2, c2)
    
    if not legal_moves_tuples:
        print("Nessuna mossa legale disponibile. Partita terminata.")
        break
        
    # 3. ACTION MASKING: Scegli la mossa legale con il Q-value più alto
    best_legal_action_idx = -1
    best_legal_q_value = -float('inf')
    
    for move in legal_moves_tuples:
        # Converti la mossa legale nel suo indice (0-4095)
        r1, c1, r2, c2 = move
        idx = encode_action(r1, c1, r2, c2)
        
        # Controlla se questa mossa ha un valore più alto delle altre legali trovate finora
        if q_values[idx] > best_legal_q_value:
            best_legal_q_value = q_values[idx]
            best_legal_action_idx = idx
            
    # Se per qualche motivo non trova nulla (impossibile se la lista non è vuota), prendi la prima a caso
    if best_legal_action_idx == -1:
        r1, c1, r2, c2 = legal_moves_tuples[0]
        best_legal_action_idx = encode_action(r1, c1, r2, c2)

    # 4. Esegui l'azione SCELTA TRA QUELLE LEGALI
    obs, reward, terminated, truncated, info = env.step(best_legal_action_idx)
    env.render()
    print(f"Step: {step_count}, Reward: {reward}")
    
    if terminated or truncated:
        winner = "NERO" if reward > 0 else "BIANCO" # Semplificazione basata sull'ultimo reward
        print(f"Fine partita. Risultato finale: {reward}")
        break

print("Test completato.")

--- INIZIO ADDESTRAMENTO ---
Inizio training per 100000 episodi...
Episodio 0, Reward Totale: -10.0, Epsilon: 0.92
Episodio 100, Reward Totale: -10.0, Epsilon: 0.91
Episodio 200, Reward Totale: -10.0, Epsilon: 0.90
Episodio 300, Reward Totale: -10.0, Epsilon: 0.89
Episodio 400, Reward Totale: -10.0, Epsilon: 0.88
Episodio 500, Reward Totale: -10.0, Epsilon: 0.87
Episodio 600, Reward Totale: -10.0, Epsilon: 0.86
Episodio 700, Reward Totale: -10.0, Epsilon: 0.85
Episodio 800, Reward Totale: -10.0, Epsilon: 0.84
Episodio 900, Reward Totale: -10.0, Epsilon: 0.84
Episodio 1000, Reward Totale: -10.0, Epsilon: 0.83
Episodio 1100, Reward Totale: -10.0, Epsilon: 0.82
Episodio 1200, Reward Totale: -10.0, Epsilon: 0.81
Episodio 1300, Reward Totale: -10.0, Epsilon: 0.80
Episodio 1400, Reward Totale: -10.0, Epsilon: 0.79
Episodio 1500, Reward Totale: -10.0, Epsilon: 0.78
Episodio 1600, Reward Totale: -10.0, Epsilon: 0.77
Episodio 1700, Reward Totale: -10.0, Epsilon: 0.76
Episodio 1800, Reward Total

KeyboardInterrupt: 

In [18]:
import torch
import numpy as np

# Carichiamo il cervello addestrato
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DQN((8, 8), 4096).to(device)

# Se hai già fatto il training lungo, carica i pesi:
try:
    model.load_state_dict(torch.load("dama_dqn_final.pth", map_location=device))
    model.eval() # Imposta in modalità valutazione (niente apprendimento)
    print("Modello caricato con successo!")
except:
    print("Attenzione: File del modello non trovato. L'IA giocherà a caso.")

def get_best_move_for_robot(board_matrix, current_player_color):
    """
    Input: 
        board_matrix: numpy array 8x8 (0=vuoto, 1=nero, etc...)
        current_player_color: 1 (Nero) o 3 (Bianco)
    Output:
        ((r1, c1), (r2, c2)): Coordinate di partenza e arrivo
    """
    
    # 1. Creiamo un ambiente temporaneo per calcolare le mosse legali
    temp_env = CheckersEnv()
    temp_env.board = board_matrix.copy()
    temp_env.current_player = current_player_color
    legal_moves = temp_env._find_all_legal_moves()
    
    if not legal_moves:
        return None # Nessuna mossa possibile (Perso)

    # 2. Chiediamo alla Rete Neurale
    state_tensor = torch.tensor(board_matrix, dtype=torch.float32).unsqueeze(0).to(device)
    with torch.no_grad():
        q_values = model(state_tensor).cpu().numpy().flatten()

    # 3. Action Masking (Scegli la migliore tra le legali)
    best_move = None
    best_val = -float('inf')

    for move in legal_moves:
        r1, c1, r2, c2 = move
        # Formula inversa per ottenere l'indice da (r,c)
        idx = r1 * 512 + c1 * 64 + r2 * 8 + c2
        
        if q_values[idx] > best_val:
            best_val = q_values[idx]
            best_move = ((r1, c1), (r2, c2))
            
    return best_move

# --- ESEMPIO DI UTILIZZO ---
# Immagina che la telecamera veda questa scacchiera:
fake_camera_board = np.zeros((8,8), dtype=int)
fake_camera_board[5, 0] = 3 # Pedina bianca in basso sinistra
fake_camera_board[4, 1] = 1 # Pedina nera vicina (da mangiare)

# Chiediamo all'IA cosa fare (Gioca il BIANCO = 3)
mossa = get_best_move_for_robot(fake_camera_board, current_player_color=3)

if mossa:
    start, end = mossa
    print(f"ROBOT: Sposta la pedina da {start} a {end}")
else:
    print("ROBOT: Non ho mosse, ho perso.")

Modello caricato con successo!
ROBOT: Sposta la pedina da (5, 0) a (3, 2)


In [25]:
import pygame
import sys
import torch
import torch.nn as nn
import numpy as np
import time
import gymnasium as gym
from gymnasium import spaces

# ==============================================================================
# 1. CONFIGURAZIONI E COSTANTI
# ==============================================================================
VUOTO = 0
NERO_PEDINA = 1
NERO_DAMA = 2
BIANCO_PEDINA = 3
BIANCO_DAMA = 4
DIMENSIONE = 8
SQUARE_SIZE = 80 # Grandezza caselle in pixel
WIDTH, HEIGHT = DIMENSIONE * SQUARE_SIZE, DIMENSIONE * SQUARE_SIZE

# Colori
RED = (200, 50, 50)
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BEIGE = (210, 180, 140)
BROWN = (139, 69, 19)
BLUE = (50, 50, 200)
GREEN = (0, 255, 0)
GOLD = (255, 215, 0)

# ==============================================================================
# 2. CLASSE AMBIENTE (CheckersEnv) - LOGICA COMPLETA
# ==============================================================================
class CheckersEnv(gym.Env):
    def __init__(self):
        super(CheckersEnv, self).__init__()
        self.action_space = spaces.Discrete(DIMENSIONE**4)
        self.observation_space = spaces.Box(low=VUOTO, high=BIANCO_DAMA, shape=(DIMENSIONE, DIMENSIONE), dtype=np.int32)
        
        self.player_map = {NERO_PEDINA: NERO_DAMA, BIANCO_PEDINA: BIANCO_DAMA}
        self.board = self._initialize_board()
        self.current_player = NERO_PEDINA
        self.possible_moves = []

    def _initialize_board(self):
        board = np.zeros((DIMENSIONE, DIMENSIONE), dtype=np.int32)
        # NERO (1)
        for r in range(3):
            for c in range(DIMENSIONE):
                if (r + c) % 2 == 1: board[r, c] = NERO_PEDINA
        # BIANCO (3)
        for r in range(5, 8):
            for c in range(DIMENSIONE):
                if (r + c) % 2 == 1: board[r, c] = BIANCO_PEDINA
        return board
        
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.board = self._initialize_board()
        self.current_player = NERO_PEDINA 
        self.possible_moves = self._find_all_legal_moves()
        return self.board.copy(), {'current_player': self.current_player}

    def _decode_action(self, action_int):
        r1 = action_int // (DIMENSIONE**3)
        c1 = (action_int // (DIMENSIONE**2)) % DIMENSIONE
        r2 = (action_int // DIMENSIONE) % DIMENSIONE
        c2 = action_int % DIMENSIONE
        return r1, c1, r2, c2

    # --- LOGICA MOVIMENTO MANCANTE NEL TUO CODICE PRECEDENTE ---
    def _find_all_legal_moves(self):
        captures = []
        normal_moves = []
        king_val = self.player_map.get(self.current_player, 0)
        
        for r in range(DIMENSIONE):
            for c in range(DIMENSIONE):
                piece = self.board[r, c]
                if piece == self.current_player or piece == king_val:
                    captures.extend(self._get_piece_captures(r, c))
                    if not captures:
                        normal_moves.extend(self._get_piece_moves(r, c))
        
        return captures if captures else normal_moves

    def _get_piece_moves(self, r, c):
        moves = []
        piece = self.board[r, c]
        if piece == NERO_PEDINA: dirs = [(1, -1), (1, 1)]
        elif piece == BIANCO_PEDINA: dirs = [(-1, -1), (-1, 1)]
        else: dirs = [(-1, -1), (-1, 1), (1, -1), (1, 1)]

        for dr, dc in dirs:
            nr, nc = r + dr, c + dc
            if 0 <= nr < DIMENSIONE and 0 <= nc < DIMENSIONE:
                if self.board[nr, nc] == VUOTO:
                    moves.append((r, c, nr, nc))
        return moves

    def _get_piece_captures(self, r, c):
        captures = []
        piece = self.board[r, c]
        
        if piece in (NERO_PEDINA, NERO_DAMA):
            enemies = (BIANCO_PEDINA, BIANCO_DAMA)
            pawn_dirs = [(1, -1), (1, 1)]
        else:
            enemies = (NERO_PEDINA, NERO_DAMA)
            pawn_dirs = [(-1, -1), (-1, 1)]
            
        is_king = piece in (NERO_DAMA, BIANCO_DAMA)
        dirs = [(-1, -1), (-1, 1), (1, -1), (1, 1)] if is_king else pawn_dirs
        
        for dr, dc in dirs:
            mid_r, mid_c = r + dr, c + dc
            land_r, land_c = r + 2*dr, c + 2*dc
            
            if 0 <= land_r < DIMENSIONE and 0 <= land_c < DIMENSIONE:
                if self.board[mid_r, mid_c] in enemies:
                    if self.board[land_r, land_c] == VUOTO:
                        captures.append((r, c, land_r, land_c))
        return captures

    def _execute_move(self, r1, c1, r2, c2):
        piece = self.board[r1, c1]
        self.board[r2, c2] = piece
        self.board[r1, c1] = VUOTO
        reward = 0
        
        # Cattura
        if abs(r2 - r1) == 2:
            self.board[(r1 + r2) // 2, (c1 + c2) // 2] = VUOTO
            reward += 1.0
            
        # Promozione
        if piece == NERO_PEDINA and r2 == 7:
            self.board[r2, c2] = NERO_DAMA
            reward += 3.0
        elif piece == BIANCO_PEDINA and r2 == 0:
            self.board[r2, c2] = BIANCO_DAMA
            reward += 3.0
            
        return reward

    def step(self, action):
        r1, c1, r2, c2 = self._decode_action(action)
        move = (r1, c1, r2, c2)
        terminated = False
        reward = 0
        
        if move in self.possible_moves:
            reward += self._execute_move(r1, c1, r2, c2)
            # Cambio turno
            self.current_player = BIANCO_PEDINA if self.current_player == NERO_PEDINA else NERO_PEDINA
            
            self.possible_moves = self._find_all_legal_moves()
            if not self.possible_moves:
                terminated = True
                reward += 10.0 
        else:
            reward = -10.0 
            terminated = True 

        return self.board.copy(), reward, terminated, False, {}

# ==============================================================================
# 3. RETE NEURALE (DQN)
# ==============================================================================
class DQN(nn.Module):
    def __init__(self, input_shape, n_actions):
        super(DQN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.ReLU(),
            nn.Flatten()
        )
        conv_out_size = 64 * input_shape[0] * input_shape[1]
        self.fc = nn.Sequential(
            nn.Linear(conv_out_size, 512), nn.ReLU(),
            nn.Linear(512, n_actions)
        )

    def forward(self, x):
        x = x.unsqueeze(1).float()
        return self.fc(self.conv(x))

# ==============================================================================
# 4. INTERFACCIA GRAFICA (PYGAME)
# ==============================================================================
def get_row_col_from_mouse(pos):
    x, y = pos
    row = y // SQUARE_SIZE
    col = x // SQUARE_SIZE
    return row, col

def main_gui():
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption('Dama AI vs Human')
    clock = pygame.time.Clock()

    env = CheckersEnv() 
    obs, _ = env.reset()
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    policy_net = DQN((8, 8), 8**4).to(device)
    
    try:
        policy_net.load_state_dict(torch.load("dama_dqn_final.pth", map_location=device))
        policy_net.eval()
        print("Modello AI caricato con successo!")
    except FileNotFoundError:
        print("ATTENZIONE: File 'dama_dqn_final.pth' non trovato.")
        print("L'IA giocherà mosse casuali (o la prima mossa legale disponibile).")

    selected_piece = None
    valid_destinations = [] 
    
    running = True
    game_over = False
    winner_text = ""

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

            # --- TURNO UMANO (NERO) ---
            if event.type == pygame.MOUSEBUTTONDOWN and not game_over:
                if env.current_player == NERO_PEDINA:
                    pos = pygame.mouse.get_pos()
                    row, col = get_row_col_from_mouse(pos)
                    
                    clicked_piece = env.board[row, col]
                    
                    # 1. Seleziona pedina
                    if clicked_piece in (NERO_PEDINA, NERO_DAMA):
                        selected_piece = (row, col)
                        valid_destinations = []
                        for move in env.possible_moves:
                            r1, c1, r2, c2 = move
                            if (r1, c1) == selected_piece:
                                valid_destinations.append((r2, c2))
                                
                    # 2. Muovi pedina
                    elif selected_piece and (row, col) in valid_destinations:
                        r1, c1 = selected_piece
                        r2, c2 = (row, col)
                        action_idx = r1 * 512 + c1 * 64 + r2 * 8 + c2
                        
                        obs, reward, terminated, truncated, info = env.step(action_idx)
                        selected_piece = None
                        valid_destinations = []
                        
                        if terminated:
                            game_over = True
                            winner_text = "HAI VINTO!"

        # --- TURNO AI (BIANCO) ---
        if env.current_player == BIANCO_PEDINA and not game_over:
            pygame.display.set_caption("L'IA sta pensando...")
            pygame.display.flip()
            time.sleep(0.5) 
            
            legal_moves = env.possible_moves
            if not legal_moves:
                game_over = True
                winner_text = "HAI VINTO! (AI bloccata)"
            else:
                state_tensor = torch.tensor(env.board, dtype=torch.float32).unsqueeze(0).to(device)
                with torch.no_grad():
                    q_values = policy_net(state_tensor).cpu().numpy().flatten()
                
                best_action = -1
                best_val = -float('inf')
                
                # Action Masking
                found_move = False
                for move in legal_moves:
                    r1, c1, r2, c2 = move
                    idx = r1 * 512 + c1 * 64 + r2 * 8 + c2
                    if q_values[idx] > best_val:
                        best_val = q_values[idx]
                        best_action = idx
                        found_move = True
                
                # Fallback se qualcosa va storto
                if not found_move:
                    r1, c1, r2, c2 = legal_moves[0]
                    best_action = r1 * 512 + c1 * 64 + r2 * 8 + c2

                obs, reward, terminated, truncated, info = env.step(best_action)
                
                if terminated:
                    game_over = True
                    winner_text = "HA VINTO L'IA!"
            
            pygame.display.set_caption('Dama AI vs Human')

        # --- DISEGNO ---
        screen.fill(BLACK)
        
        # Scacchiera
        for r in range(DIMENSIONE):
            for c in range(DIMENSIONE):
                color = BEIGE if (r + c) % 2 == 0 else BROWN
                pygame.draw.rect(screen, color, (c*SQUARE_SIZE, r*SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE))
                if (r, c) in valid_destinations:
                    pygame.draw.circle(screen, GREEN, (c*SQUARE_SIZE + SQUARE_SIZE//2, r*SQUARE_SIZE + SQUARE_SIZE//2), 10)

        # Pedine
        for r in range(DIMENSIONE):
            for c in range(DIMENSIONE):
                piece = env.board[r, c]
                if piece != 0:
                    color = RED if piece in (1, 2) else WHITE
                    pygame.draw.circle(screen, color, (c*SQUARE_SIZE + SQUARE_SIZE//2, r*SQUARE_SIZE + SQUARE_SIZE//2), SQUARE_SIZE//2 - 10)
                    if piece in (2, 4):
                        pygame.draw.circle(screen, GOLD, (c*SQUARE_SIZE + SQUARE_SIZE//2, r*SQUARE_SIZE + SQUARE_SIZE//2), SQUARE_SIZE//2 - 25, 3)
                    if selected_piece == (r, c):
                        pygame.draw.rect(screen, GREEN, (c*SQUARE_SIZE, r*SQUARE_SIZE, SQUARE_SIZE, SQUARE_SIZE), 3)

        if game_over:
            font = pygame.font.SysFont(None, 75)
            text_surf = font.render(winner_text, True, BLUE)
            # Sfondo per il testo
            text_rect = text_surf.get_rect(center=(WIDTH//2, HEIGHT//2))
            pygame.draw.rect(screen, WHITE, text_rect.inflate(20, 20)) 
            screen.blit(text_surf, text_rect)

        pygame.display.flip()
        clock.tick(60)

    pygame.quit()
    sys.exit()

if __name__ == "__main__":
    main_gui()

Modello AI caricato con successo!


SystemExit: 

In [35]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
import math
import pygame
import sys
import time

# --- CONFIGURAZIONI ---
DIMENSIONE = 8
ACTION_DIM = DIMENSIONE**4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- AMBIENTE ---
class CheckersEnv(gym.Env):
    def __init__(self):
        super(CheckersEnv, self).__init__()
        self.action_space = spaces.Discrete(ACTION_DIM)
        self.observation_space = spaces.Box(low=0, high=4, shape=(8,8), dtype=np.int32)
        self.player_map = {1: 2, 3: 4}
        self.board = self._initialize_board()
        self.current_player = 1
        self.possible_moves = []

    def _initialize_board(self):
        board = np.zeros((8, 8), dtype=np.int32)
        for r in range(3):
            for c in range(8):
                if (r + c) % 2 == 1: board[r, c] = 1
        for r in range(5, 8):
            for c in range(8):
                if (r + c) % 2 == 1: board[r, c] = 3
        return board
        
    def reset(self, seed=None):
        self.board = self._initialize_board()
        self.current_player = 1
        self.possible_moves = self._find_all_legal_moves()
        return self.board.copy(), {}

    def _decode_action(self, action_int):
        r1 = action_int // 512
        c1 = (action_int // 64) % 8
        r2 = (action_int // 8) % 8
        c2 = action_int % 8
        return r1, c1, r2, c2

    def _find_all_legal_moves(self):
        captures = []
        normal = []
        king = self.player_map.get(self.current_player, 0)
        for r in range(8):
            for c in range(8):
                if self.board[r,c] == self.current_player or self.board[r,c] == king:
                    captures.extend(self._get_captures(r, c))
                    if not captures: normal.extend(self._get_moves(r, c))
        return captures if captures else normal

    def _get_moves(self, r, c):
        m = []
        dirs = [(1,-1),(1,1)] if self.board[r,c]==1 else [(-1,-1),(-1,1)]
        if self.board[r,c] in [2,4]: dirs = [(1,-1),(1,1),(-1,-1),(-1,1)]
        for dr, dc in dirs:
            if 0<=r+dr<8 and 0<=c+dc<8 and self.board[r+dr,c+dc]==0:
                m.append((r,c,r+dr,c+dc))
        return m

    def _get_captures(self, r, c):
        caps = []
        enemies = [3,4] if self.board[r,c] in [1,2] else [1,2]
        dirs = [(1,-1),(1,1)] if self.board[r,c] in [1,2] else [(-1,-1),(-1,1)]
        if self.board[r,c] in [2,4]: dirs = [(1,-1),(1,1),(-1,-1),(-1,1)]
        for dr, dc in dirs:
            if 0<=r+2*dr<8 and 0<=c+2*dc<8:
                if self.board[r+dr,c+dc] in enemies and self.board[r+2*dr,c+2*dc]==0:
                    caps.append((r,c,r+2*dr,c+2*dc))
        return caps

    def step(self, action):
        r1,c1,r2,c2 = self._decode_action(action)
        if (r1,c1,r2,c2) in self.possible_moves:
            self.board[r2,c2] = self.board[r1,c1]
            self.board[r1,c1] = 0
            rew = 0
            if abs(r2-r1) == 2: 
                self.board[(r1+r2)//2, (c1+c2)//2] = 0
                rew = 1.0
            if (self.board[r2,c2]==1 and r2==7) or (self.board[r2,c2]==3 and r2==0):
                self.board[r2,c2] += 1
                rew += 3.0
            
            self.current_player = 3 if self.current_player == 1 else 1
            self.possible_moves = self._find_all_legal_moves()
            
            done = not self.possible_moves
            if done: rew += 10.0
            
            return self.board.copy(), rew, done, False, {}
        else:
            return self.board.copy(), -10.0, True, False, {}

# --- AI ---
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64*64, 512), nn.ReLU(),
            nn.Linear(512, ACTION_DIM)
        )
    def forward(self, x):
        return self.net(x.unsqueeze(1).float())

print("Cella 1 completata: Classi definite.")

Cella 1 completata: Classi definite.


In [37]:
# Setup Training
env = CheckersEnv()
policy_net = DQN().to(DEVICE)
target_net = DQN().to(DEVICE)
target_net.load_state_dict(policy_net.state_dict())
optimizer = optim.Adam(policy_net.parameters(), lr=0.0001)
memory = deque(maxlen=20000)

steps_done = 0
EPS_START, EPS_END, EPS_DECAY = 1.0, 0.05, 100000

print("--- INIZIO ADDESTRAMENTO DOPPIO ---")
print("Obiettivo 1: 1.000 episodi -> 'dama_1k.pth'")
print("Obiettivo 2: 10.000 episodi -> 'dama_10k.pth'")

for i_episode in range(1, 10001):
    state, _ = env.reset()
    done = False
    
    while not done:
        steps_done += 1
        eps = EPS_END + (EPS_START - EPS_END) * math.exp(-1. * steps_done / EPS_DECAY)
        
        # Select Action
        if random.random() > eps:
            with torch.no_grad():
                state_t = torch.tensor(state).unsqueeze(0).to(DEVICE)
                action = policy_net(state_t).argmax().item()
        else:
            action = env.action_space.sample()
            
        # Step
        next_state, reward, term, trunc, _ = env.step(action)
        done = term or trunc
        
        memory.append((state, action, next_state, reward, done))
        state = next_state
        
        # Optimize
        if len(memory) > 64:
            batch = random.sample(memory, 64)
            states, acts, nexts, rews, dones = zip(*batch)
            
            S = torch.tensor(np.array(states)).to(DEVICE)
            A = torch.tensor(acts).unsqueeze(1).to(DEVICE)
            R = torch.tensor(rews).to(DEVICE)
            NS = torch.tensor(np.array(nexts)).to(DEVICE)
            D = torch.tensor(dones).to(DEVICE)
            
            Q = policy_net(S).gather(1, A).squeeze()
            next_Q = target_net(NS).max(1)[0].detach()
            Target = R + 0.99 * next_Q * (~D)
            
            loss = nn.MSELoss()(Q, Target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
    # Update Target Network
    if i_episode % 500 == 0:
        target_net.load_state_dict(policy_net.state_dict())
        print(f"Episodio {i_episode} completato. Epsilon: {eps:.2f}")

    # SALVATAGGIO STEP 1 (1.000 Episodi)
    if i_episode == 1000:
        torch.save(policy_net.state_dict(), "dama_1k.pth")
        print(">>> SALVATO MODELLO 1k: dama_1k.pth")

    # SALVATAGGIO STEP 2 (10.000 Episodi)
    if i_episode == 10000:
        torch.save(policy_net.state_dict(), "dama_10k.pth")
        print(">>> SALVATO MODELLO 10k: dama_10k.pth")

print("Addestramento completato.")

--- INIZIO ADDESTRAMENTO DOPPIO ---
Obiettivo 1: 1.000 episodi -> 'dama_1k.pth'
Obiettivo 2: 10.000 episodi -> 'dama_10k.pth'
Episodio 500 completato. Epsilon: 1.00
Episodio 1000 completato. Epsilon: 0.99
>>> SALVATO MODELLO 1k: dama_1k.pth
Episodio 1500 completato. Epsilon: 0.99
Episodio 2000 completato. Epsilon: 0.98
Episodio 2500 completato. Epsilon: 0.98
Episodio 3000 completato. Epsilon: 0.97
Episodio 3500 completato. Epsilon: 0.97
Episodio 4000 completato. Epsilon: 0.96
Episodio 4500 completato. Epsilon: 0.96
Episodio 5000 completato. Epsilon: 0.95
Episodio 5500 completato. Epsilon: 0.95
Episodio 6000 completato. Epsilon: 0.94
Episodio 6500 completato. Epsilon: 0.94
Episodio 7000 completato. Epsilon: 0.93
Episodio 7500 completato. Epsilon: 0.93
Episodio 8000 completato. Epsilon: 0.92
Episodio 8500 completato. Epsilon: 0.92
Episodio 9000 completato. Epsilon: 0.92
Episodio 9500 completato. Epsilon: 0.91
Episodio 10000 completato. Epsilon: 0.91
>>> SALVATO MODELLO 10k: dama_10k.pth


In [39]:
# --- CONFIGURAZIONE GRAFICA ---
SQUARE_SIZE = 80
WIDTH, HEIGHT = 640, 640
COLORS = {
    'W': (255,255,255), 'B': (0,0,0), 'R': (200,50,50), 
    'Beige': (210,180,140), 'Brown': (139,69,19), 
    'Green': (0,255,0), 'Gold': (255,215,0)
}

def play_game_notebook():
    # INPUT UTENTE
    print("Contro chi vuoi giocare?")
    print("1 -> Principiante (1k episodi)")
    print("2 -> Esperto (10k episodi)")
    scelta = input("Inserisci 1 o 2: ")
    
    if choice == '1':
        model_path = "dama_1k.pth"
        title = "Dama vs Principiante (1k)"
    else:
        model_path = "dama_10k.pth"
        title = "Dama vs Esperto (10k)"
        
    print(f"Caricamento {model_path}...")

    # Carica Modello
    game_net = DQN().to(DEVICE)
    try:
        game_net.load_state_dict(torch.load(model_path, map_location=DEVICE))
        game_net.eval()
    except FileNotFoundError:
        print("Errore: File non trovato. Hai eseguito la Cella 2 fino in fondo?")
        return

    # Inizializza Pygame
    pygame.init()
    screen = pygame.display.set_mode((WIDTH, HEIGHT))
    pygame.display.set_caption(title)
    clock = pygame.time.Clock()
    
    env = CheckersEnv()
    obs, _ = env.reset()
    
    selected = None
    running = True
    game_over = False
    font = pygame.font.SysFont(None, 60)

    while running:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                pygame.quit()
                return # Esce dalla funzione per non bloccare il notebook

            # TURNO UMANO (Nero/Rosso = 1)
            if event.type == pygame.MOUSEBUTTONDOWN and not game_over and env.current_player == 1:
                pos = pygame.mouse.get_pos()
                r, c = pos[1]//SQUARE_SIZE, pos[0]//SQUARE_SIZE
                
                # Clicca su pezzo
                if env.board[r,c] in [1, 2]:
                    selected = (r,c)
                # Clicca su destinazione
                elif selected:
                    r1, c1 = selected
                    # Trova se la mossa è valida
                    act = r1*512 + c1*64 + r*8 + c
                    
                    # Cerca se l'azione è nelle mosse possibili
                    is_valid = False
                    for m in env.possible_moves:
                        if m == (r1, c1, r, c):
                            is_valid = True
                            break
                    
                    if is_valid:
                        obs, _, term, _, _ = env.step(act)
                        selected = None
                        if term: 
                            game_over = True
                            print("HAI VINTO!")

        # TURNO AI (Bianco = 3)
        if env.current_player == 3 and not game_over:
            pygame.display.flip()
            time.sleep(0.5)
            
            legal = env.possible_moves
            if not legal:
                game_over = True
                print("L'IA non ha mosse. HAI VINTO!")
            else:
                # Action Masking
                with torch.no_grad():
                    t_obs = torch.tensor(env.board).unsqueeze(0).to(DEVICE)
                    q_vals = game_net(t_obs).cpu().numpy().flatten()
                
                best_act = -1
                best_val = -float('inf')
                
                for (r1, c1, r2, c2) in legal:
                    idx = r1*512 + c1*64 + r2*8 + c2
                    if q_vals[idx] > best_val:
                        best_val = q_vals[idx]
                        best_act = idx
                
                obs, _, term, _, _ = env.step(best_act)
                if term:
                    game_over = True
                    print("L'IA HA VINTO!")

        # DISEGNO
        screen.fill(COLORS['B'])
        for r in range(8):
            for c in range(8):
                col = COLORS['Beige'] if (r+c)%2==0 else COLORS['Brown']
                pygame.draw.rect(screen, col, (c*80, r*80, 80, 80))
                
                # Evidenzia destinazioni
                if selected:
                    sr, sc = selected
                    for m in env.possible_moves:
                        if m[0] == sr and m[1] == sc:
                             pygame.draw.circle(screen, COLORS['Green'], (m[3]*80+40, m[2]*80+40), 10)

                p = env.board[r,c]
                if p != 0:
                    cc = COLORS['R'] if p in [1,2] else COLORS['W']
                    pygame.draw.circle(screen, cc, (c*80+40, r*80+40), 30)
                    if p in [2,4]:
                        pygame.draw.circle(screen, COLORS['Gold'], (c*80+40, r*80+40), 10)
                    if selected == (r,c):
                         pygame.draw.rect(screen, COLORS['Green'], (c*80, r*80, 80, 80), 3)
        
        if game_over:
            txt = font.render("GAME OVER", True, COLORS['Green'])
            screen.blit(txt, (200, 300))

        pygame.display.flip()
        clock.tick(30)

# Avvia il gioco
play_game_notebook()

Contro chi vuoi giocare?
1 -> Principiante (1k episodi)
2 -> Esperto (10k episodi)


NameError: name 'choice' is not defined