<a href="https://colab.research.google.com/github/drouvotlucas/Connect-4-game/blob/main/Puissance4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import output
from random import randint, random

!mkdir "/data"

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras.initializers import VarianceScaling
from tensorflow.keras.layers import Input, Dense, Activation, Lambda, Flatten, Dropout, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

# Definitions des modèles

In [None]:
def model(name):
  model = Sequential(name=name)

  model.add(Conv2D(16, (7, 7), activation='relu', input_shape=(6, 7,1), padding="same"))
  model.add(Flatten())

  # Ajout des couches entièrement connectées avec une seule sortie (score pour chaque colonne)
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(50, activation='relu'))
  model.add(Dense(7, activation=None))

  model.build()

  model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

  return model


In [None]:
# Définition des modèles

Joueur = model(name="Joueur")
Joueur.summary()

Model: "Joueur"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_1 (Conv2D)           (None, 6, 7, 16)          800       
                                                                 
 flatten_1 (Flatten)         (None, 672)               0         
                                                                 
 dense_4 (Dense)             (None, 50)                33650     
                                                                 
 dense_5 (Dense)             (None, 50)                2550      
                                                                 
 dense_6 (Dense)             (None, 50)                2550      
                                                                 
 dense_7 (Dense)             (None, 7)                 357       
                                                                 
Total params: 39907 (155.89 KB)
Trainable params: 39907 (155

# Le puissance 4

In [None]:
import numpy as np
from random import randint

class Puissance4:
    def __init__(self) -> None:
        # Variables du jeu
        self.__player = 1
        self.__lastPlayedColumnA = None
        self.__lastPlayedColumnJ = None
        self.grille = np.array([
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]
            ])
        self.grilleAdversaire = np.array([
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]
            ])
        self.__top = [0, 0, 0, 0, 0, 0, 0]

    def __back__(self):
      if self.__lastPlayedColumnA != None:
        self.__top[self.__lastPlayedColumnJ]-=1
        self.grille[self.__top[self.__lastPlayedColumnJ]][self.__lastPlayedColumnJ] = 0
        self.grilleAdversaire[self.__top[self.__lastPlayedColumnJ]][self.__lastPlayedColumnJ] = 0

        self.__top[self.__lastPlayedColumnA]-=1
        self.grille[self.__top[self.__lastPlayedColumnA]][self.__lastPlayedColumnA] = 0
        self.grilleAdversaire[self.__top[self.__lastPlayedColumnA]][self.__lastPlayedColumnA] = 0

    def __play__(self, column) -> int:
        if (column>=0 and column<7):
            if self.__top[column] < 6:
              if self.__player==1:
                self.__lastPlayedColumnA = column
              else:
                self.__lastPlayedColumnJ = column
              jeton = self.__player
              self.grille[self.__top[column]][column] = jeton
              self.grilleAdversaire[self.__top[column]][column] = 3 - jeton
              self.__top[column] += 1
              self.__player = 3 - self.__player # Next player
              code = self.__checkWin__(column, jeton)
              return code
            else:
                print("This column is full")
                return 2
        else:
            print("This column doesn't exist")
            return 2

    def __checkWin__(self, column, jeton) -> bool:
        win = self.__checkColumn__(column, jeton) or self.__checkRow__(column, jeton) or self.__checkDiag1__(column, jeton) or self.__checkdiag2__(column, jeton)
        if win:
            return 0 # Victoire
        else:
            if (any(self.__top) == 6):
                return 3 # Match nul
            else:
                return 1 # La partie continue


    def __checkRow__(self, column, jeton) -> bool:
        lenght = 1
        exploreLeft = True
        j=column
        while (exploreLeft):
            j-=1
            if j>=0:
                exploreLeft = (self.grille[self.__top[column]-1][j] == jeton)
                if exploreLeft:
                    lenght+=1
            else:
                exploreLeft = False

        exploreRight = True
        j=column
        while (exploreRight):
            j+=1
            if j<7:
                exploreRight = (self.grille[self.__top[column]-1][j] == jeton)
                if exploreRight:
                    lenght+=1
            else:
                exploreRight = False
        return lenght>=4

    def __checkColumn__(self, column, jeton) -> bool:
        lenght = 1
        exploreBottom = True
        i=self.__top[column]-1
        while (exploreBottom):
            i+=1
            if i<6:
                exploreBottom = (self.grille[i][column] == jeton)
                if exploreBottom:
                    lenght+=1
            else:
                exploreBottom = False

        exploreTop = True
        i=self.__top[column]-1
        while (exploreTop):
            i-=1
            if i>=0:
                exploreTop = (self.grille[i][column] == jeton)
                if exploreTop:
                    lenght+=1
            else:
                exploreTop = False

        return lenght>=4

    def __checkDiag1__(self, column, jeton) -> bool:
        # Diagonale montante de gauche à droite
        lenght = 1
        exploreLeft = True
        i=self.__top[column]-1
        j=column
        while (exploreLeft):
            i+=1
            j-=1
            if i<6 and j>=0:
                exploreLeft = (self.grille[i][j] == jeton)
                if exploreLeft:
                    lenght+=1
            else:
                exploreLeft = False

        exploreRight = True
        i=self.__top[column]-1
        j=column
        while (exploreRight):
            i-=1
            j+=1
            if i>=0 and j<7:
                exploreRight = (self.grille[i][j] == jeton)
                if exploreRight:
                    lenght+=1
            else:
                exploreRight = False

        return lenght>=4

    def __checkdiag2__(self, column, jeton) -> bool:
        # Diagonale descendante de gauche à droite
        lenght = 1
        exploreLeft = True
        i=self.__top[column]-1
        j=column
        while (exploreLeft):
            i-=1
            j-=1
            if i>=0 and j>=0:
                exploreLeft = (self.grille[i][j] == jeton)
                if exploreLeft:
                    lenght+=1
            else:
                exploreLeft = False

        exploreRight = True
        i=self.__top[column]-1
        j=column
        while (exploreRight):
            i+=1
            j+=1
            if i<6 and j<7:
                exploreRight = (self.grille[i][j] == jeton)
                if exploreRight:
                    lenght+=1
            else:
                exploreRight = False

        return lenght>=4

    def getRandomColumn(self)->int:
        n = 0
        for itop in self.__top:
            if itop<6:
                n+=1
        randN = randint(1,n)

        n=0
        i=0
        while n!=randN:
            if self.__top[i]<6:
                n+=1
            i+=1
        return i-1


# IA MinMax

In [None]:
class IAMinMax:
    def __init__(self):
        self.profondeur = 4

    def is_valid_location(self, board, col):
        return board[5][col] == 0

    def get_valid_locations(self, board):
        valid_locations = []
        for col in range(7):
            if self.is_valid_location(board, col):
                valid_locations.append(col)
        return valid_locations

    def winning_move(self, board, piece):
        # Check horizontal locations
        for c in range(4):
            for r in range(6):
                if board[r][c] == piece and board[r][c + 1] == piece and board[r][c + 2] == piece and board[r][c + 3] == piece:
                    return True

        # Check vertical locations
        for c in range(7):
            for r in range(3):
                if board[r][c] == piece and board[r + 1][c] == piece and board[r + 2][c] == piece and board[r + 3][c] == piece:
                    return True

        # Check positively sloped diagonals
        for c in range(4):
            for r in range(3):
                if board[r][c] == piece and board[r + 1][c + 1] == piece and board[r + 2][c + 2] == piece and board[r + 3][c + 3] == piece:
                    return True

        # Check negatively sloped diagonals
        for c in range(4):
            for r in range(3, 6):
                if board[r][c] == piece and board[r - 1][c + 1] == piece and board[r - 2][c + 2] == piece and board[r - 3][c + 3] == piece:
                    return True

        return False

    def is_terminal_node(self, board):
        return self.winning_move(board, 1) or self.winning_move(board, 2) or len(self.get_valid_locations(board)) == 0

    def evaluate_window(self, window, piece):
        score = 0
        opp_piece = 1 if piece == 2 else 2

        if window.count(piece) == 4:
            score += 1000000
        elif window.count(piece) == 3 and window.count(0) == 1:
            score += 5
        elif window.count(piece) == 2 and window.count(0) == 2:
            score += 2

        if window.count(opp_piece) == 3 and window.count(0) == 1:
            score -= 4

        return score

    def score_position(self, board, piece):
        score = 0

        # Score center column
        center_array = [int(i) for i in list(board[:, 7 // 2])]
        center_count = center_array.count(piece)
        score += center_count * 3

        # Score Horizontal
        for r in range(6):
            row_array = [int(i) for i in list(board[r, :])]
            for c in range(4):
                window = row_array[c:c + 4]
                score += self.evaluate_window(window, piece)

        # Score Vertical
        for c in range(7):
            col_array = [int(i) for i in list(board[:, c])]
            for r in range(3):
                window = col_array[r:r + 4]
                score += self.evaluate_window(window, piece)

        # Score positive sloped diagonal
        for r in range(3):
            for c in range(4):
                window = [board[r + i][c + i] for i in range(4)]
                score += self.evaluate_window(window, piece)

        # Score negative sloped diagonal
        for r in range(3):
            for c in range(4):
                window = [board[r + 3 - i][c + i] for i in range(4)]
                score += self.evaluate_window(window, piece)

        return score

    def get_next_open_row(self, board, col):
        for r in range(6):
            if board[r][col] == 0:
                return r

    def drop_piece(self, board, row, col, piece):
        board[row][col] = piece

    def minimax(self, grille, profondeur, maximizing_player):
        valid_locations = self.get_valid_locations(grille)
        is_terminal = self.is_terminal_node(grille)
        if profondeur == 0 or is_terminal:
            if is_terminal:
                if self.winning_move(grille, 2):
                    return (None, 1000000)
                elif self.winning_move(grille, 1):
                    return (None, -1000000)
                else:  # Game is over, no more valid moves
                    return (None, 0)
            else:  # Depth is zero
                return (None, self.score_position(grille, 2))
        if maximizing_player:
            value = -np.inf
            column = np.random.choice(valid_locations)
            for col in valid_locations:
                row = self.get_next_open_row(grille, col)
                temp_board = grille.copy()
                self.drop_piece(temp_board, row, col, 2)
                new_score = self.minimax(temp_board, profondeur - 1, False)[1]
                if new_score > value:
                    value = new_score
                    column = col
            return column, value
        else:  # Minimizing player
            value = np.inf
            column = np.random.choice(valid_locations)
            for col in valid_locations:
                row = self.get_next_open_row(grille, col)
                temp_board = grille.copy()
                self.drop_piece(temp_board, row, col, 1)
                new_score = self.minimax(temp_board, profondeur - 1, True)[1]
                if new_score < value:
                    value = new_score
                    column = col
            return column, value

    def play(self, grille):
        col, minimax_score = self.minimax(grille, 4, True)

        return col

# Entrainement

Principe:
Le MainNetwork joue contre le TargetNetwork. Toute les 4 parties, le MainNetwork met à jour ses poids sur les 4 parties précedantes. Toutes les 100 parties, le TargetNetwork copie les poids du MainNetwork.

In [None]:
# Charge les poids des modèles
Joueur.load_weights("/content/drive/MyDrive/Isima/Puissance4/tmp_puissance4.weights.h5")
Adversaire = keras.saving.load_model("/content/drive/MyDrive/Isima/Puissance4/puissance4.keras")



In [None]:
def choose_col(grille, model):
        result = model.predict(np.array([grille]), verbose=0)[0]
        maximum = max(result)
        column_max = []
        for column in range(7):
            if (result[column]>=maximum):
                column_max.append(column)
        if len(column_max)>1:
          column = column_max[randint(0, len(column_max)-1)]
        else:
          column = column_max[0]

        return column, result, maximum

In [None]:
# Parametre de l'apprentissage
alpha = 0.5
R_play = -1
R_win = 100
lamb = 0.9

N_game = 1 # Nombre de partie avant d'entrainer le joueur
M_learning = 10 # Nombre d'entrainement du joueur avant d'améliorer l'adversaire
L = 100 # Nombre d'amélioration de l'adversaire

In [None]:
myIAMinMax = IAMinMax()
n_victoire = 0
eps = 1

for l in range(L):
  print("########################################")
  print("# l =",l,"/",L)
  print("########################################")
  print("")
  print(n_victoire, " sur ", M_learning)
  #proba que l'adversaire joue au hasard
  coef = (2*(n_victoire - M_learning/2)/M_learning)*0.04
  if coef>0:
    # Le joueur a gagné plus de la moitié des partie, on augmente la difficultée en reduisant epsilon
    eps = eps*(1-coef)
  if coef<0:
    # Le joueur a perdu plus de la moitié des parties, on diminue la difficultée en augmentant epsilon
    eps = eps = eps*(1-abs(coef)) + 1*abs(coef)
  print("eps = ", eps)
  n_victoire = 0
  for m in range(M_learning):

    # Initialisation des données d'entrainement du joueur
    X = []
    Y = []

    for n in range(N_game):
      myPuissance4 = Puissance4()
      all_scoresJ_prec = None
      columnJ_prec = None
      winJ, winA = 1, 1

      # On joue tant que la partie n'est pas finie
      while (winJ!=0 and winJ!=2 and winA!=0 and winA!=2):
          # L'adversaire joue

          if not(all_scoresJ_prec is None):
            # Mise à jour du score du joueur
            columnA = myIAMinMax.play(myPuissance4.grille) #columnA, all_scoresA, scoreA = choose_col(myPuissance4.grilleAdversaire, Adversaire)
            if (columnA is None):
              print("MinMax joue au hasard")
              columnA = myPuissance4.getRandomColumn()
            winA = myPuissance4.__play__(columnA)
            # On cherche le meilleur score que le joueur peut jouer
            columnJ, all_scoresJ, scoreJ = choose_col(myPuissance4.grille, Joueur)
            winJ = myPuissance4.__play__(columnJ)

            if winA==0:
              # L'adversaire a gagné
              # On penalise le coup du joueur
              scores = np.array([0,0,0,0,0,0,0])
              scores[columnJ_prec] = -R_win
              Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)

            elif winA==3:
              # Match nul
              # On penalise le coup du joueur
              scores = np.array([0,0,0,0,0,0,0])
              scores[columnJ_prec] = -R_win
              Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)

            else:
              if winJ==0:
                # Le joueur a gagné
                # Récompense du coup gagnant
                scores = np.array([0,0,0,0,0,0,0])
                scores[columnJ_prec] = R_win
                Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)

              elif winJ==1:
                #La partie continue
                scores = np.array([0,0,0,0,0,0,0])
                scores[columnJ_prec] = R_play + lamb*scoreJ
                Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)

              elif winJ==2:
                # Le joueur a joué un coup non valide
                # On pénalise ce coup
                scores = np.array([0,0,0,0,0,0,0])
                scores[columnJ_prec] = -R_win
                Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)

              elif winJ==3:
                # Match nul
                # Mise à jour du score
                scores = np.array([0,0,0,0,0,0,0])
                scores[columnJ_prec] = R_play
                Y.append((1-alpha)*all_scoresJ_prec + alpha*scores)


          myPuissance4.__back__()

          if random()<eps:
            # L'adversaire joue au hasard
            columnA = myPuissance4.getRandomColumn()
            winA = myPuissance4.__play__(columnA)
          else:
            if (all_scoresJ_prec is None):
              # Mise à jour du score du joueur
              columnA = myIAMinMax.play(myPuissance4.grille)
            winA = myPuissance4.__play__(columnA)

          if winA==2:
            # L'adversaire a joué un coup non valide
            # On le fait jouer aléatoirement
            columnA = myPuissance4.getRandomColumn()
            winA = myPuissance4.__play__(columnA)

          if winA==1:
            # Enregistrement de la grille pour preparer l'entrainement
            X.append(myPuissance4.grille.copy())

            # Le joueur joue
            columnJ, all_scoresJ, scoreJ = choose_col(myPuissance4.grille, Joueur)
            winJ= myPuissance4.__play__(columnJ)

            if winJ==0:
              # Le joueur a gagné
              # Récompense du coup gagnant
              scores = np.array([0,0,0,0,0,0,0])
              scores[columnJ] = R_win
              Y.append((1-alpha)*all_scoresJ + alpha*scores)

            elif winJ==2:
              # Le joueur a joué un coup non valide
              # On pénalise ce coup
              scores = np.array([0,0,0,0,0,0,0])
              scores[columnJ] = -R_win
              Y.append((1-alpha)*all_scoresJ + alpha*scores)

            elif winJ==3:
              # Match nul
              # Mise à jour du score
              scores = np.array([0,0,0,0,0,0,0])
              scores[columnJ_prec] = R_play
              Y.append((1-alpha)*all_scoresJ + alpha*scores)

            # On memorise les scores du coup joué pour son actualisation au tour suivant
            all_scoresJ_prec = all_scoresJ.copy()
            columnJ_prec = columnJ

        ##----- Fin while -----#
      # Affichage de la partie jouée dans la console
      if winA==0:
        print("L'adversaire(1) a gagné")
      elif winA==2:
        print("L'adversaire(1) a joué une colonne pleine")
      elif winJ==0:
        print("Le joueur(2) a gagné")
        n_victoire+=1
      elif winJ==2:
        print("Le joueur(2) a joué une colonne pleine")
      for i in range(5,-1,-1):
        print(myPuissance4.grille[i])
      print("")

    # Les N_game ont été jouées, il faut entrainer le joueur à partir des coups jouées
    print("Entrainement du joueur.")
    #print(Y)
    hist = Joueur.fit(np.array(X), np.array(Y), epochs=40, verbose=0)
    print(hist.history['loss'][-10:])
    #print(Joueur.predict(np.array(X), verbose=0))

  output.clear()
  # Le joueur à été entrainer M_learning fois, il que l'adversaire copie les poids du joueurs
  print("L'adversaire copie les poids du joueur.")
  Joueur.save("/data/joueur.keras", overwrite=True)
  Adversaire = keras.saving.load_model("/data/joueur.keras")

  # On enregistre les poids du Joueur sur le drive
  Joueur.save_weights("/content/drive/MyDrive/Isima/Puissance4/tmp_puissance4.weights.h5", overwrite=True)

# Enregistrement du modèle
Joueur.save('/content/drive/MyDrive/Isima/Puissance4/puissance4.keras')
print("eps = ", eps)

L'adversaire copie les poids du joueur.
eps =  0.6348979798697088
