In [2]:
import torch

In [3]:
def iaTurn(board):
    print(board)
    normalize(board)
    return int(input("turno"))

Game : Designed by https://github.com/KeithGalli/Connect4-Python/

In [4]:
import numpy as np
import pygame
import sys
import math

pygame 2.6.1 (SDL 2.28.4, Python 3.12.11)
Hello from the pygame community. https://www.pygame.org/contribute.html


Juego con IA implementada

In [6]:
import numpy as np
import pygame

# Colores
BLUE = (0, 0, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
YELLOW = (255, 255, 0)

# Dimensiones del tablero
ROW_COUNT = 6
COLUMN_COUNT = 7

# Variables globales
board = None
game_over = None
turn = None
winner = None
screen = None
SQUARESIZE = None
size = None
width = None
height = None
RADIUS = None
myfont = None


def normalize(board):
    """Convierte el tablero a formato imagen (6x7x1)."""
    return board.reshape((ROW_COUNT, COLUMN_COUNT, 1))


def reset():
    """Reinicia el juego y crea un tablero nuevo."""
    global board, game_over, winner, turn
    global screen, SQUARESIZE, size, width, height, RADIUS, myfont

    board = create_board()
    print_board(board)
    game_over = False
    winner = None
    turn = 1  # ahora arranca el jugador 1 (pieza 1)

    pygame.init()

    SQUARESIZE = 100
    width = COLUMN_COUNT * SQUARESIZE
    height = (ROW_COUNT + 1) * SQUARESIZE
    size = (width, height)
    RADIUS = int(SQUARESIZE / 2 - 5)

    screen = pygame.display.set_mode(size)
    draw_board(board)
    pygame.display.update()

    myfont = pygame.font.SysFont("monospace", 75)


def create_board():
    """Crea un tablero vacío."""
    return np.zeros((ROW_COUNT, COLUMN_COUNT))


def drop_piece(board, row, col, piece):
    """Coloca una ficha en el tablero."""
    board[row][col] = piece


def is_valid_location(board, col):
    """Revisa si se puede jugar en esa columna."""
    return board[ROW_COUNT - 1][col] == 0


def get_next_open_row(board, col):
    """Devuelve la próxima fila libre en la columna."""
    for r in range(ROW_COUNT):
        if board[r][col] == 0:
            return r


def print_board(board):
    """Imprime el tablero en consola."""
    print(np.flip(board, 0))


def winning_move(board, piece):
    """Revisa si el jugador actual ganó."""
    # Horizontal
    for c in range(COLUMN_COUNT - 3):
        for r in range(ROW_COUNT):
            if all(board[r][c + i] == piece for i in range(4)):
                return True

    # Vertical
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT - 3):
            if all(board[r + i][c] == piece for i in range(4)):
                return True

    # Diagonal positiva
    for c in range(COLUMN_COUNT - 3):
        for r in range(ROW_COUNT - 3):
            if all(board[r + i][c + i] == piece for i in range(4)):
                return True

    # Diagonal negativa
    for c in range(COLUMN_COUNT - 3):
        for r in range(3, ROW_COUNT):
            if all(board[r - i][c + i] == piece for i in range(4)):
                return True

    return False


def draw_board(board):
    """Dibuja el tablero con pygame."""
    global screen, SQUARESIZE, RADIUS, width, height

    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT):
            pygame.draw.rect(
                screen,
                BLUE,
                (c * SQUARESIZE, r * SQUARESIZE + SQUARESIZE, SQUARESIZE, SQUARESIZE),
            )
            pygame.draw.circle(
                screen,
                BLACK,
                (int(c * SQUARESIZE + SQUARESIZE / 2),
                 int(r * SQUARESIZE + SQUARESIZE + SQUARESIZE / 2)),
                RADIUS,
            )

    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT):
            if board[r][c] == 1:
                pygame.draw.circle(
                    screen,
                    RED,
                    (int(c * SQUARESIZE + SQUARESIZE / 2),
                     height - int(r * SQUARESIZE + SQUARESIZE / 2)),
                    RADIUS,
                )
            elif board[r][c] == 2:
                pygame.draw.circle(
                    screen,
                    YELLOW,
                    (int(c * SQUARESIZE + SQUARESIZE / 2),
                     height - int(r * SQUARESIZE + SQUARESIZE / 2)),
                    RADIUS,
                )

    pygame.display.update()


def moove(col):
    """Realiza un movimiento en la columna indicada."""
    global board, turn, game_over, winner

    if not game_over:
        piece = turn  # ahora el turno ES directamente la pieza (1 o 2)

        if is_valid_location(board, col):
            row = get_next_open_row(board, col)
            drop_piece(board, row, col, piece)

            if winning_move(board, piece):
                game_over = True
                winner = piece

            print_board(board)
            draw_board(board)

            # alterna entre jugador 1 (pieza 1) y jugador 2 (pieza 2)
            turn = 1 if turn == 2 else 2


In [7]:
class ConnectFourEnv:
    """
    Entorno de Connect Four para entrenamiento de IA.
    Usa:
      - board: tablero actual
      - turn: pieza que mueve actualmente (1 o 2)
      - game_over: si la partida terminó
      - winner: quién ganó
    """

    def __init__(self):
        self.reset()

    def reset(self):
        """Reinicia el juego."""
        self.board = np.zeros((ROW_COUNT, COLUMN_COUNT), dtype=np.int8)
        self.turn = 1            # turno = pieza directamente
        self.game_over = False
        self.winner = None
        return self.get_state()

    def valid_actions(self):
        """Columnas en las que se puede jugar."""
        return [c for c in range(COLUMN_COUNT) if self.board[ROW_COUNT - 1][c] == 0]

    def board_full(self):
        """Revisa si el tablero está lleno."""
        return len(self.valid_actions()) == 0

    def step(self, action):
        """
        Realiza un movimiento.
        action: columna donde poner la ficha
        Devuelve: next_state, reward, done, info
        """
        if self.game_over:
            raise ValueError("La partida terminó, resetear el entorno.")

        if action not in self.valid_actions():
            # castigo por jugar en columna inválida
            return self.get_state(), -10.0, True, {"invalid": True}

        # ubica la ficha
        row = get_next_open_row(self.board, action)
        drop_piece(self.board, row, action, self.turn)

        # revisa ganador
        if winning_move(self.board, self.turn):
            self.game_over = True
            self.winner = self.turn
            reward = 1.0  # recompensa por ganar
            done = True
        elif self.board_full():
            self.game_over = True
            self.winner = None
            reward = 0.0  # empate
            done = True
        else:
            reward = 0.0
            done = False
            # alterna turno
            self.turn = opponent_piece(self.turn)

        return self.get_state(), reward, done, {}

    def get_state(self):
        """Devuelve el estado canónico para la red neuronal."""
        return canonical_state(self.board, self.turn)

Entrenamiento de modelo

In [8]:
from collections import deque
import random
import os

# Si usás TF 2.x:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models, optimizers


In [9]:
def valid_actions_from_board(b):
    return [c for c in range(COLUMN_COUNT) if b[ROW_COUNT - 1][c] == 0]


def board_full(b):
    return len(valid_actions_from_board(b)) == 0


def opponent_piece(piece):
    return 2 if piece == 1 else 1


def canonical_state(b, turn):
    me = turn            # ahora turn ya es 1 o 2
    you = opponent_piece(me)
    s = np.zeros_like(b, dtype=np.float32)
    s[b == me] = 1.0
    s[b == you] = -1.0
    return s.reshape((ROW_COUNT, COLUMN_COUNT, 1))


In [10]:
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import random
from collections import deque
import numpy as np

class DQNAgent:
    def __init__(self, env, gamma=0.99, lr=0.001, batch_size=64, memory_size=5000):
        self.env = env
        self.gamma = gamma
        self.lr = lr
        self.batch_size = batch_size
        self.memory = deque(maxlen=memory_size)

        # Red neuronal
        self.model = self.build_model()

    def build_model(self):
        model = models.Sequential()
        model.add(layers.Conv2D(64, (2,2), activation='relu', input_shape=(ROW_COUNT, COLUMN_COUNT, 1)))
        model.add(layers.Conv2D(128, (2,2), activation='relu'))
        model.add(layers.Flatten())
        model.add(layers.Dense(128, activation='relu'))
        model.add(layers.Dense(COLUMN_COUNT, activation='linear'))  # 7 salidas, una por columna
        model.compile(optimizer=optimizers.Adam(learning_rate=self.lr), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state, epsilon=0.1):
        """ε-greedy policy"""
        if np.random.rand() < epsilon:
            return random.choice(self.env.valid_actions())
        q_values = self.model.predict(state[np.newaxis, ...], verbose=0)[0]
        # solo consideramos columnas válidas
        valid_cols = self.env.valid_actions()
        q_values_invalid = [-np.inf if c not in valid_cols else q_values[c] for c in range(COLUMN_COUNT)]
        return int(np.argmax(q_values_invalid))

    def replay(self):
        if len(self.memory) < self.batch_size:
            return
        batch = random.sample(self.memory, self.batch_size)
        states = []
        targets = []
        for state, action, reward, next_state, done in batch:
            target = self.model.predict(state[np.newaxis, ...], verbose=0)[0]
            if done:
                target[action] = reward
            else:
                t_next = self.model.predict(next_state[np.newaxis, ...], verbose=0)[0]
                target[action] = reward + self.gamma * np.max(t_next)
            states.append(state)
            targets.append(target)
        self.model.fit(np.array(states), np.array(targets), epochs=1, verbose=0)


In [12]:
# Parámetros de entrenamiento
EPISODES = 1000      # cantidad de partidas
EPSILON_START = 1.0  # exploración inicial
EPSILON_END = 0.05   # exploración mínima
EPSILON_DECAY = 0.9995
GAMMA = 0.99
BATCH_SIZE = 64

# Creamos entorno y agente
env = ConnectFourEnv()
agent = DQNAgent(env, gamma=GAMMA, batch_size=BATCH_SIZE)

epsilon = EPSILON_START

for episode in range(1, EPISODES + 1):
    state = env.reset()
    done = False

    while not done:
        # Acción del agente actual
        action = agent.act(state, epsilon)
        next_state, reward, done, info = env.step(action)

        # Guardamos transición en memoria
        agent.remember(state, action, reward, next_state, done)

        # Actualizamos estado
        state = next_state

    # Entrenamos la red después de la partida
    agent.replay()

    # Decay de epsilon
    epsilon = max(EPSILON_END, epsilon * EPSILON_DECAY)

    # Info cada 100 partidas
    if episode % 10 == 0:
        print(f"Episode {episode}, Epsilon {epsilon:.3f}, Winner: {env.winner}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode 10, Epsilon 0.995, Winner: 1
Episode 20, Epsilon 0.990, Winner: 2


KeyboardInterrupt: 

In [None]:
def iaTurn(agent, board, turn):
    """
    Devuelve la columna elegida por la IA para jugar.

    Parámetros:
        agent : DQNAgent entrenado
        board : np.array del tablero actual (6x7)
        turn  : pieza que mueve (1 o 2)

    Retorna:
        col : columna elegida por la IA
    """
    # Obtenemos el estado canónico desde la perspectiva de quien mueve
    state = canonical_state(board, turn)

    # ε=0 para que siempre elija la mejor acción según la red
    q_values = agent.model.predict(state[np.newaxis, ...], verbose=0)[0]

    # Solo consideramos columnas válidas
    valid_cols = [c for c in range(COLUMN_COUNT) if board[ROW_COUNT-1][c] == 0]
    q_values_filtered = [-np.inf if c not in valid_cols else q_values[c] for c in range(COLUMN_COUNT)]

    # Elegimos la columna con mayor Q-value
    col = int(np.argmax(q_values_filtered))
    return col


In [None]:
BLUE = (0,0,255)
BLACK = (0,0,0)
RED = (255,0,0)
YELLOW = (255,255,0)

ROW_COUNT = 6
COLUMN_COUNT = 7

def create_board():
    board = np.zeros((ROW_COUNT,COLUMN_COUNT))
    return board

def drop_piece(board, row, col, piece):
    board[row][col] = piece

def is_valid_location(board, col):
    return board[ROW_COUNT-1][col] == 0

def get_next_open_row(board, col):
    for r in range(ROW_COUNT):
        if board[r][col] == 0:
            return r

def print_board(board):
    print(np.flip(board, 0))

def winning_move(board, piece):
    # Check horizontal locations for win
    for c in range(COLUMN_COUNT-3):
        for r in range(ROW_COUNT):
            if board[r][c] == piece and board[r][c+1] == piece and board[r][c+2] == piece and board[r][c+3] == piece:
                return True

    # Check vertical locations for win
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT-3):
            if board[r][c] == piece and board[r+1][c] == piece and board[r+2][c] == piece and board[r+3][c] == piece:
                return True

    # Check positively sloped diaganols
    for c in range(COLUMN_COUNT-3):
        for r in range(ROW_COUNT-3):
            if board[r][c] == piece and board[r+1][c+1] == piece and board[r+2][c+2] == piece and board[r+3][c+3] == piece:
                return True

    # Check negatively sloped diaganols
    for c in range(COLUMN_COUNT-3):
        for r in range(3, ROW_COUNT):
            if board[r][c] == piece and board[r-1][c+1] == piece and board[r-2][c+2] == piece and board[r-3][c+3] == piece:
                return True

def draw_board(board):
    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT):
            pygame.draw.rect(screen, BLUE, (c*SQUARESIZE, r*SQUARESIZE+SQUARESIZE, SQUARESIZE, SQUARESIZE))
            pygame.draw.circle(screen, BLACK, (int(c*SQUARESIZE+SQUARESIZE/2), int(r*SQUARESIZE+SQUARESIZE+SQUARESIZE/2)), RADIUS)

    for c in range(COLUMN_COUNT):
        for r in range(ROW_COUNT):
            if board[r][c] == 1:
                pygame.draw.circle(screen, RED, (int(c*SQUARESIZE+SQUARESIZE/2), height-int(r*SQUARESIZE+SQUARESIZE/2)), RADIUS)
            elif board[r][c] == 2:
                pygame.draw.circle(screen, YELLOW, (int(c*SQUARESIZE+SQUARESIZE/2), height-int(r*SQUARESIZE+SQUARESIZE/2)), RADIUS)
    pygame.display.update()


board = create_board()
print_board(board)
game_over = False
turn = 0

pygame.init()

SQUARESIZE = 100

width = COLUMN_COUNT * SQUARESIZE
height = (ROW_COUNT+1) * SQUARESIZE

size = (width, height)

RADIUS = int(SQUARESIZE/2 - 5)

screen = pygame.display.set_mode(size)
draw_board(board)
pygame.display.update()

myfont = pygame.font.SysFont("monospace", 75)

while not game_over:
    if turn==1:
            col = iaTurn(agent, board, turn)

            if is_valid_location(board, col):
                row = get_next_open_row(board, col)
                drop_piece(board, row, col, 2)

                if winning_move(board, 2):
                    label = myfont.render("IA wins!!", 1, YELLOW)
                    screen.blit(label, (40,10))
                    game_over = True
            print_board(board)
            draw_board(board)
            turn += 1
            turn = turn % 2

            if game_over:
                pygame.time.wait(3000)
    else:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

            if event.type == pygame.MOUSEMOTION:
                pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE))
                posx = event.pos[0]
                if turn == 0:
                    pygame.draw.circle(screen, RED, (posx, int(SQUARESIZE/2)), RADIUS)
                else:
                    pygame.draw.circle(screen, YELLOW, (posx, int(SQUARESIZE/2)), RADIUS)
            pygame.display.update()

            if event.type == pygame.MOUSEBUTTONDOWN:
                pygame.draw.rect(screen, BLACK, (0,0, width, SQUARESIZE))
                #print(event.pos)
                # Ask for Player 1 Input
                if turn == 0:
                    posx = event.pos[0]
                    print(posx)
                    col = int(math.floor(posx/SQUARESIZE))
                    print(col)
                    if is_valid_location(board, col):
                        row = get_next_open_row(board, col)
                        drop_piece(board, row, col, 1)

                        if winning_move(board, 1):
                            label = myfont.render("Player 1 wins!!", 1, RED)
                            screen.blit(label, (40,10))
                            game_over = True
                    print_board(board)
                    draw_board(board)
                    turn += 1
                    turn = turn % 2

                    if game_over:
                        pygame.time.wait(3000)

            else:
                continue
