In [1]:
import copy
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
from gym import spaces
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

In [2]:
# Define the Othello game logic
class Othello:
    def __init__(self):
        self.board = [[' ' for _ in range(8)] for _ in range(8)]
        self.initialize_board()

    def initialize_board(self):
        self.board[3][3] = 'W'
        self.board[3][4] = 'B'
        self.board[4][3] = 'B'
        self.board[4][4] = 'W'

    def is_valid_move(self, row, col, player):
        if self.board[row][col] != ' ':
            return False
        opponent = 'B' if player == 'W' else 'W'
        valid = False
        directions = [(0, 1), (1, 0), (0, -1), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
        for dr, dc in directions:
            r, c = row + dr, col + dc
            count = 0
            while 0 <= r < 8 and 0 <= c < 8 and self.board[r][c] == opponent:
                r += dr
                c += dc
                count += 1
            if count > 0 and 0 <= r < 8 and 0 <= c < 8 and self.board[r][c] == player:
                valid = True
        return valid

    def make_move(self, row, col, player):
        if not self.is_valid_move(row, col, player):
            return False
        self.board[row][col] = player
        opponent = 'B' if player == 'W' else 'W'
        directions = [(0, 1), (1, 0), (0, -1), (-1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)]
        for dr, dc in directions:
            r, c = row + dr, col + dc
            path = []
            while 0 <= r < 8 and 0 <= c < 8 and self.board[r][c] == opponent:
                path.append((r, c))
                r += dr
                c += dc
            if path and 0 <= r < 8 and 0 <= c < 8 and self.board[r][c] == player:
                for pr, pc in path:
                    self.board[pr][pc] = player
        return True

    def get_valid_moves(self, player):
        moves = []
        for row in range(8):
            for col in range(8):
                if self.is_valid_move(row, col, player):
                    moves.append((row, col))
        return moves

    def has_valid_moves(self, player):
        return len(self.get_valid_moves(player)) > 0

    def count_pieces(self):
        black = sum(row.count('B') for row in self.board)
        white = sum(row.count('W') for row in self.board)
        return black, white

    def evaluate_board(self):
        black, white = self.count_pieces()
        return black - white

    def is_board_full(self):
        for row in self.board:
            if ' ' in row:
                return False
        return True

    def is_game_over(self):
        return self.is_board_full() or (not self.has_valid_moves('B') and not self.has_valid_moves('W'))

In [3]:
# Define the Gym environment for Othello
class OthelloEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self):
        super(OthelloEnv, self).__init__()
        self.othello = Othello()
        self.action_space = spaces.Discrete(64)  # 8x8 board
        self.observation_space = spaces.Box(low=0, high=2, shape=(8, 8), dtype=np.int8)
        self.player = 'B'  # Black starts

    def reset(self):
        self.othello = Othello()
        self.player = 'B'
        return self._get_obs()

    def _get_obs(self):
        obs = np.zeros((8, 8), dtype=np.int8)
        for i in range(8):
            for j in range(8):
                if self.othello.board[i][j] == 'B':
                    obs[i][j] = 1
                elif self.othello.board[i][j] == 'W':
                    obs[i][j] = 2
        return obs

    def step(self, action):
        row, col = divmod(action, 8)
        if not self.othello.is_valid_move(row, col, self.player):
            return self._get_obs(), -1, True, {}  # Invalid move, end the game

        self.othello.make_move(row, col, self.player)
        done = self.othello.is_game_over()

        if done:
            black, white = self.othello.count_pieces()
            if black > white:
                reward = 1 if self.player == 'B' else -1
            elif white > black:
                reward = 1 if self.player == 'W' else -1
            else:
                reward = 0  # Draw
        else:
            reward = 0  # Valid move but not end of game

        self.player = 'W' if self.player == 'B' else 'B'
        return self._get_obs(), reward, done, {}

    def render(self, mode='human'):
        for row in self.othello.board:
            print(' '.join(row))
        print()

In [5]:
# Define the DQN model
class DQN(nn.Module):
    def __init__(self):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(8 * 8, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 64)

    def forward(self, x):
        x = x.view(-1, 8 * 8)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
# Train the DQN model
def train_dqn(env, num_episodes=500, batch_size=64, gamma=0.99, epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.995, lr=0.001):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dqn = DQN().to(device)
    optimizer = optim.Adam(dqn.parameters(), lr=lr)
    criterion = nn.MSELoss()

    replay_buffer = deque(maxlen=10000)
    epsilon = epsilon_start

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        total_reward = 0

        while not done:
            valid_moves = env.othello.get_valid_moves(env.player)
            if not valid_moves:
                break

            if random.random() < epsilon:
                action = random.choice([r * 8 + c for r, c in valid_moves])
            else:
                with torch.no_grad():
                    state_tensor = torch.FloatTensor(state).to(device).unsqueeze(0)
                    q_values = dqn(state_tensor)
                    valid_action_indices = [r * 8 + c for r, c in valid_moves]
                    action = valid_action_indices[q_values[0, valid_action_indices].argmax().item()]

            next_state, reward, done, _ = env.step(action)
            total_reward += reward
            replay_buffer.append((state, action, reward, next_state, done))
            state = next_state

            if len(replay_buffer) > batch_size:
                batch = random.sample(replay_buffer, batch_size)
                states, actions, rewards, next_states, dones = zip(*batch)

                states_tensor = torch.FloatTensor(np.array(states)).to(device)
                actions_tensor = torch.LongTensor(actions).to(device)
                rewards_tensor = torch.FloatTensor(rewards).to(device)
                next_states_tensor = torch.FloatTensor(np.array(next_states)).to(device)
                dones_tensor = torch.FloatTensor(dones).to(device)

                q_values = dqn(states_tensor).gather(1, actions_tensor.unsqueeze(1)).squeeze(1)
                next_q_values = dqn(next_states_tensor).max(1)[0]
                target_q_values = rewards_tensor + gamma * next_q_values * (1 - dones_tensor)

                loss = criterion(q_values, target_q_values)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        epsilon = max(epsilon_end, epsilon * epsilon_decay)
        print(f"Episode {episode + 1}: Total Reward = {total_reward}")

    return dqn

In [7]:
# Save the trained model
def save_model(model, filepath):
    torch.save(model.state_dict(), filepath)

# Load the trained model
def load_model(filepath):
    model = DQN()
    model.load_state_dict(torch.load(filepath))
    model.eval()
    return model

def convert_to_keras(model):
    keras_model = Sequential()
    keras_model.add(Flatten(input_shape=(8, 8)))
    keras_model.add(Dense(128, activation='relu'))
    keras_model.add(Dense(128, activation='relu'))
    keras_model.add(Dense(64, activation='linear'))
    
    # Build the model to initialize weights
    keras_model.build((None, 8, 8))

    # Set weights for each layer
    keras_model.layers[1].set_weights([model.fc1.weight.cpu().detach().numpy().T, model.fc1.bias.cpu().detach().numpy()])
    keras_model.layers[2].set_weights([model.fc2.weight.cpu().detach().numpy().T, model.fc2.bias.cpu().detach().numpy()])
    keras_model.layers[3].set_weights([model.fc3.weight.cpu().detach().numpy().T, model.fc3.bias.cpu().detach().numpy()])
    
    keras_model.compile(optimizer='adam', loss='mse')
    return keras_model

In [8]:
# Train the model
env = OthelloEnv()
dqn_model = train_dqn(env)

# Convert the PyTorch model to Keras and save as .keras file
keras_model = convert_to_keras(dqn_model)
keras_model.save('othello_dqn.keras')

Episode 1: Total Reward = 1
Episode 2: Total Reward = 0
Episode 3: Total Reward = 1
Episode 4: Total Reward = 0
Episode 5: Total Reward = -1
Episode 6: Total Reward = -1
Episode 7: Total Reward = 0
Episode 8: Total Reward = 0
Episode 9: Total Reward = 1
Episode 10: Total Reward = 0
Episode 11: Total Reward = 0
Episode 12: Total Reward = 1
Episode 13: Total Reward = 1
Episode 14: Total Reward = 1
Episode 15: Total Reward = 1
Episode 16: Total Reward = 0
Episode 17: Total Reward = 0
Episode 18: Total Reward = 1
Episode 19: Total Reward = 0
Episode 20: Total Reward = 1
Episode 21: Total Reward = 0
Episode 22: Total Reward = 0
Episode 23: Total Reward = 1
Episode 24: Total Reward = 0
Episode 25: Total Reward = 1
Episode 26: Total Reward = 0
Episode 27: Total Reward = -1
Episode 28: Total Reward = 0
Episode 29: Total Reward = 0
Episode 30: Total Reward = 1
Episode 31: Total Reward = -1
Episode 32: Total Reward = 0
Episode 33: Total Reward = 0
Episode 34: Total Reward = -1
Episode 35: Total 

  super().__init__(**kwargs)


In [9]:
import tkinter as tk
from tkinter import messagebox
import numpy as np
from tensorflow.keras.models import load_model

class OthelloGUI:
    def __init__(self, root):
        self.game = Othello()
        self.root = root
        self.root.title("Othello")
        self.canvas = tk.Canvas(root, width=400, height=400)
        self.canvas.pack()
        self.current_player = 'B'
        self.keras_model = load_model('othello_dqn.keras')  # Load pre-trained Keras model
        self.draw_board()
        self.canvas.bind("<Button-1>", self.handle_click)

    def draw_board(self):
        self.canvas.delete("all")
        for row in range(8):
            for col in range(8):
                x0 = col * 50
                y0 = row * 50
                x1 = x0 + 50
                y1 = y0 + 50
                self.canvas.create_rectangle(x0, y0, x1, y1, fill="green")
                if self.game.board[row][col] == 'B':
                    self.canvas.create_oval(x0 + 5, y0 + 5, x1 - 5, y1 - 5, fill="black")
                elif self.game.board[row][col] == 'W':
                    self.canvas.create_oval(x0 + 5, y0 + 5, x1 - 5, y1 - 5, fill="white")

        self.root.update()

    def handle_click(self, event):
        col = event.x // 50
        row = event.y // 50
        if self.game.is_valid_move(row, col, self.current_player):
            self.game.make_move(row, col, self.current_player)
            self.draw_board()
            self.switch_turn()
            self.check_game_over()

    def switch_turn(self):
        if self.current_player == 'B':
            self.current_player = 'W'
            if self.game.has_valid_moves(self.current_player):
                self.root.after(500, self.ai_move)
            else:
                self.current_player = 'B'
                if not self.game.has_valid_moves(self.current_player):
                    self.check_game_over()
        else:
            self.current_player = 'B'
            if not self.game.has_valid_moves(self.current_player):
                self.current_player = 'W'
                if not self.game.has_valid_moves(self.current_player):
                    self.check_game_over()
                else:
                    if self.current_player == 'W':
                        self.root.after(500, self.ai_move)

    def ai_move(self):
        valid_moves = self.game.get_valid_moves('W')
        if not valid_moves:
            self.switch_turn()
            return
        
        # Get the best move using the trained Keras model
        best_move = None
        max_q_value = float('-inf')
        
        for move in valid_moves:
            row, col = move
            temp_game = copy.deepcopy(self.game)
            temp_game.make_move(row, col, 'W')
            state = self._get_obs(temp_game)
            state_tensor = np.expand_dims(state, axis=0)
            q_values = self.keras_model.predict(state_tensor)
            max_q_value_for_move = np.max(q_values)
            
            if max_q_value_for_move > max_q_value:
                max_q_value = max_q_value_for_move
                best_move = move
        
        if best_move:
            row, col = best_move
            if self.game.is_valid_move(row, col, 'W'):
                self.game.make_move(row, col, 'W')
                self.draw_board()
                self.switch_turn()

    def check_game_over(self):
        if self.game.is_game_over():
            black, white = self.game.count_pieces()
            if black > white:
                winner = "Black wins!"
            elif white > black:
                winner = "White wins!"
            else:
                winner = "It's a tie!"
            messagebox.showinfo("Game Over", winner)
            self.root.destroy()

    def _get_obs(self, game):
        obs = np.zeros((8, 8), dtype=np.int8)
        for i in range(8):
            for j in range(8):
                if game.board[i][j] == 'B':
                    obs[i][j] = 1
                elif game.board[i][j] == 'W':
                    obs[i][j] = 2
        return obs

if __name__ == "__main__":
    root = tk.Tk()
    game = OthelloGUI(root)
    root.mainloop()

  saveable.load_own_variables(weights_store.get(inner_path))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12

Exception in Tkinter callback
Traceback (most recent call last):
  File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\tkinter\__init__.py", line 1921, in __call__
    return self.func(*args)
  File "C:\Users\user\AppData\Local\Temp\ipykernel_5344\1837262960.py", line 41, in handle_click
    self.check_game_over()
  File "C:\Users\user\AppData\Local\Temp\ipykernel_5344\1837262960.py", line 102, in check_game_over
    self.root.destroy()
  File "c:\Users\user\AppData\Local\Programs\Python\Python310\lib\tkinter\__init__.py", line 2341, in destroy
    self.tk.call('destroy', self._w)
_tkinter.TclError: can't invoke "destroy" command: application has been destroyed
