In [4]:
import tkinter as tk
import random

# Fonction pour initialiser la grille avec deux tuiles au départ
def initialize_game():
    board = [[0] * 4 for _ in range(4)]
    add_new_tile(board)
    add_new_tile(board)
    return board

# Fonction pour ajouter une nouvelle tuile (2 ou 4)
def add_new_tile(board):
    empty_tiles = [(r, c) for r in range(4) for c in range(4) if board[r][c] == 0]
    if empty_tiles:
        r, c = random.choice(empty_tiles)
        board[r][c] = 2 if random.random() < 0.9 else 4

# Fonction pour déplacer et fusionner les tuiles à gauche
def move_left(board):
    new_board = []
    for row in board:
        new_row = [num for num in row if num != 0]
        for i in range(len(new_row) - 1):
            if new_row[i] == new_row[i + 1]:
                new_row[i] *= 2
                new_row[i + 1] = 0
        new_row = [num for num in new_row if num != 0]
        new_board.append(new_row + [0] * (4 - len(new_row)))
    return new_board

# Fonction pour inverser les lignes (pour déplacer à droite)
def reverse(board):
    return [row[::-1] for row in board]

# Fonction pour transposer la grille (pour déplacer vers le haut/bas)
def transpose(board):
    return [list(row) for row in zip(*board)]

# Fonctions pour les autres directions
def move_right(board):
    return reverse(move_left(reverse(board)))

def move_up(board):
    return transpose(move_left(transpose(board)))

def move_down(board):
    return transpose(move_right(transpose(board)))

# Fonction pour vérifier si le joueur a gagné
def check_win(board):
    return any(2048 in row for row in board)

# Fonction pour vérifier s'il reste des déplacements possibles
def check_game_over(board):
    if any(0 in row for row in board):
        return False
    for row in board:
        for i in range(3):
            if row[i] == row[i + 1]:
                return False
    for col in range(4):
        for i in range(3):
            if board[i][col] == board[i + 1][col]:
                return False
    return True

# Interface graphique tkinter
class Game2048:
    def __init__(self):
        self.window = tk.Tk()
        self.window.title('2048')
        self.board = initialize_game()
        self.grid_cells = []
        self.create_grid()
        self.update_grid()
        self.window.bind("<Key>", self.key_pressed)
        self.window.mainloop()

    # Créer la grille visuelle
    def create_grid(self):
        background = tk.Frame(self.window, bg='#92877d', width=500, height=500)
        background.grid(pady=(100, 0))
        for i in range(4):
            row = []
            for j in range(4):
                cell = tk.Frame(background, bg='#9e948a', width=100, height=100)
                cell.grid(row=i, column=j, padx=10, pady=10)
                label = tk.Label(cell, text='', bg='#9e948a', justify=tk.CENTER, font=('Helvetica', 40, 'bold'), width=4, height=2)
                label.grid()
                row.append(label)
            self.grid_cells.append(row)

    # Mettre à jour l'affichage de la grille avec les nouvelles valeurs
    def update_grid(self):
        for i in range(4):
            for j in range(4):
                value = self.board[i][j]
                if value == 0:
                    self.grid_cells[i][j].config(text='', bg='#9e948a')
                else:
                    self.grid_cells[i][j].config(text=str(value), bg=self.get_color(value))
        self.window.update_idletasks()

    # Couleurs des tuiles selon leur valeur
    def get_color(self, value):
        colors = {
            2: '#eee4da', 4: '#ede0c8', 8: '#f2b179', 16: '#f59563', 
            32: '#f67c5f', 64: '#f65e3b', 128: '#edcf72', 256: '#edcc61', 
            512: '#edc850', 1024: '#edc53f', 2048: '#edc22e'
        }
        return colors.get(value, '#3c3a32')

    # Gérer les touches pressées (WASD ou flèches de direction)
    def key_pressed(self, event):
        key = event.keysym
        if key == 'Up' or key == 'z':
            new_board = move_up(self.board)
        elif key == 'Down' or key == 's':
            new_board = move_down(self.board)
        elif key == 'Left' or key == 'q':
            new_board = move_left(self.board)
        elif key == 'Right' or key == 'd':
            new_board = move_right(self.board)
        else:
            return
        
        # Si le plateau a changé, ajouter une nouvelle tuile
        if new_board != self.board:
            self.board = new_board
            add_new_tile(self.board)
            self.update_grid()

            # Vérification des conditions de victoire ou de défaite
            if check_win(self.board):
                self.show_message('You Win!')
            elif check_game_over(self.board):
                self.show_message('Game Over!')

    # Afficher un message (victoire ou défaite)
    def show_message(self, message):
        msg_frame = tk.Frame(self.window, width=200, height=100)
        msg_frame.place(relx=0.5, rely=0.5, anchor=tk.CENTER)
        msg_label = tk.Label(msg_frame, text=message, font=('Helvetica', 24, 'bold'))
        msg_label.pack()

# Lancer le jeu
if __name__ == "__main__":
    Game2048()

In [5]:
import numpy as np
import random

class Game2048Env:
    def __init__(self):
        self.reset()

    def reset(self):
        self.board = initialize_game()
        return np.array(self.board)

    def step(self, action):
        prev_board = np.copy(self.board)

        if action == 0:
            self.board = move_up(self.board)
        elif action == 1:
            self.board = move_down(self.board)
        elif action == 2:
            self.board = move_left(self.board)
        elif action == 3:
            self.board = move_right(self.board)

        if not np.array_equal(prev_board, self.board):
            add_new_tile(self.board)

        reward = self.compute_reward(prev_board)
        done = check_win(self.board) or check_game_over(self.board)

        return np.array(self.board), reward, done

    def compute_reward(self, prev_board):
        return np.sum(self.board) - np.sum(prev_board)

    def get_state(self):
        return np.array(self.board)

# Exemple d'actions : 0 = Haut, 1 = Bas, 2 = Gauche, 3 = Droite

In [10]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import random
from collections import deque

class DQNAgent:
    def __init__(self, input_shape, num_actions):
        self.num_actions = num_actions
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # Discount rate
        self.epsilon = 1.0  # Exploration rate
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001  # Learning rate

        # Création du modèle de Q-Network et du Target Network
        self.model = self.create_model(input_shape, num_actions)
        self.target_model = self.create_model(input_shape, num_actions)

    def create_model(self, input_shape, num_actions):
        model = tf.keras.Sequential([
            layers.Input(shape=input_shape),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dense(128, activation='relu'),
            layers.Dense(num_actions, activation='linear')  # Output: Q-values for each action
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def update_target_model(self):
        # Met à jour le modèle cible avec les poids du modèle principal
        self.target_model.set_weights(self.model.get_weights())

    def remember(self, state, action, reward, next_state, done):
        # Sauvegarde la transition dans la mémoire
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        # Politique ε-greedy : explore avec probabilité epsilon, sinon exploite
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.num_actions)
        q_values = self.model.predict(state, verbose=0)  # Prédit les Q-values pour chaque action
        return np.argmax(q_values[0])  # Choisit l'action avec la Q-value la plus élevée

    def replay(self):
        if len(self.memory) < 64:  # Attends que la mémoire ait assez d'expériences
            return
        minibatch = random.sample(self.memory, 64)  # Sélectionne un échantillon de la mémoire
        for state, action, reward, next_state, done in minibatch:
            target = self.model.predict(state, verbose=0)
            if done:
                target[0][action] = reward  # Récompense si l'épisode est terminé
            else:
                t = self.target_model.predict(next_state, verbose=0)
                target[0][action] = reward + self.gamma * np.amax(t[0])  # Q-value cible

            self.model.fit(state, target, epochs=1, verbose=0)

        # Réduit epsilon (exploration) au fil du temps
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [11]:
import time
import tkinter as tk

class Game2048EnvGUI(Game2048Env):
    def __init__(self, root):
        super().__init__()
        self.root = root
        self.root.title("2048 Training")
        self.canvas = tk.Canvas(root, width=400, height=400)
        self.canvas.pack()
        self.tiles = [[self.canvas.create_text(100 * j + 50, 100 * i + 50, text='', font=("Helvetica", 30))
                       for j in range(4)] for i in range(4)]

    def update_gui(self):
        for i in range(4):
            for j in range(4):
                value = self.board[i][j]
                self.canvas.itemconfig(self.tiles[i][j], text=str(value) if value != 0 else '')
        self.root.update()

if __name__ == "__main__":
    root = tk.Tk()
    env_gui = Game2048EnvGUI(root)
    agent = DQNAgent(input_shape=(4, 4), num_actions=4)
    
    episodes = 10  # Réduit pour visualiser rapidement
    for e in range(episodes):
        state = env_gui.reset().reshape(1, 4, 4)
        done = False
        total_reward = 0

        while not done:
            action = agent.act(state)
            next_state, reward, done = env_gui.step(action)
            next_state = next_state.reshape(1, 4, 4)

            total_reward += reward
            agent.remember(state, action, reward, next_state, done)
            state = next_state

            env_gui.update_gui()  # Met à jour l'interface graphique avec le nouvel état du jeu
            time.sleep(0.5)  # Délai pour visualiser les actions

            if done:
                print(f"Episode: {e+1}/{episodes}, Total Reward: {total_reward}")
                agent.update_target_model()
                
        agent.replay()

    root.mainloop()

Episode: 1/10, Total Reward: 382
Episode: 2/10, Total Reward: 248
Episode: 3/10, Total Reward: 348
Episode: 4/10, Total Reward: 200
Episode: 5/10, Total Reward: 438
Episode: 6/10, Total Reward: 284
Episode: 7/10, Total Reward: 170
Episode: 8/10, Total Reward: 206
Episode: 9/10, Total Reward: 272
Episode: 10/10, Total Reward: 422
