In [1]:
from chess import Player, Board
from deep_learning import NeuralNet
import random
import numpy as np
import tensorflow as tf
import pickle


class Population:
    def __init__(self, size, recreation_rate=0.25):
        assert (0 < recreation_rate) and (recreation_rate < 1)
        
        self.white_list = []
        self.black_list = []
        self.recreation_rate = recreation_rate
        self.inverse_recreation_rate = 1/self.recreation_rate
        
        # num_parents = pop_size / inverse recreation rate, so it must be even
        if self.inverse_recreation_rate % 2 != 0:
            self.inverse_recreation_rate = self.inverse_recreation_rate + 1
        
        # pop size must be even
        if size % 2 == 0:
            self.size = size
        else:
            self.size = size+1
        
        
        for i in range(self.size):
            if i % 2 == 0:
                self.white_list.append(Player("white", "ai", id=i))
            else:
                self.black_list.append(Player("black", "ai", id=i))
                
    def run(self, max_gen=3, games_per_gen=1, save=True, verbose=True):
        """trains a population for max_gen generations
        parents are selected for recreation based on their performance (elo score) in games_per_gen matches
        
        returns population at the end of max_gen
        """
        for generation in range(1,max_gen+1):
            
            # reset elo scores to 1400 at the start of new generation
            for player in self.white_list:
                player.elo_score = 1400
            for player in self.black_list:
                player.elo_score = 1400
                
            random.shuffle(self.white_list)
            for i in range(len(self.white_list)):
                
                # select a white and a black player
                player_1 = self.white_list[i]
                player_2 = self.black_list[i]
                
                # initialize a board with them
                board = Board(player_1, player_2, reset=True, max_steps=200)
                
                # Reset the players' pieces to their starting positions
                # The pieces can be anywhere on the board at this point:
                # at recreation we are not initializing new players just changing the nn of old players
                # and these players already played which displaced their pieces from their starting position
                #board.reset()
                
                print(board)
                
                # play
                winner, loser, tie = board.play(show=False, verbose=False)
                
                if verbose:
                    print("winner", winner)
                    print("loser", loser)
                    print("tie", tie)
                    
                # updating elo score (also needs the ending positions so has to be before reset)
                board.update_elo_score(winner, loser, tie, capture_bias=True)
            
            # 25% of the population have offsprings (but at least 2)
            self.recreate(n=max(2, round(self.size / self.inverse_recreation_rate)), generation=generation)
            self.mutate()
            
            if verbose:
                print(" ")
                print("Recreate. New generation: ", generation)
                print("Mutate.")
                if save:
                    print("Saving new population.", generation)
            
            if save:
                with open(f"population_{generation}.pickle", "wb") as f:
                    pickle.dump(self, f)
            
        return self
                
    
    def get_fittest(self, n=1):
        """returns the n fittest members of both colors (n + n altogether)
        if n > 1: a list is returned with ascending elo scores (best is last)
        """
        white_scores = sorted([(player.elo_score, player) for player in self.white_list], key=lambda tup: tup[0])
        black_scores = sorted([(player.elo_score, player) for player in self.black_list], key=lambda tup: tup[0])
        
        white_best = [i[1] for i in white_scores]
        black_best = [i[1] for i in black_scores]
        
        if n==1:
            return white_best[-1], black_best[-1]
        else:
            return white_best[-n:], black_best[-n:]
        
    def get_least_fit(self, n=1):
        """returns the n fittest members of both colors (n, n altogether)
        if n > 1: a list is returned with ascending elo scores (best is last)
        """
        white_scores = sorted([(player.elo_score, player) for player in self.white_list], key=lambda tup: tup[0])
        black_scores = sorted([(player.elo_score, player) for player in self.black_list], key=lambda tup: tup[0])
        
        white_best = [i[1] for i in white_scores]
        black_best = [i[1] for i in black_scores]
        
        if n==1:
            return white_best[0], black_best[0]
        else:
            return white_best[:n], black_best[:n]
    
    def mutate(self, pop_portion = 0.1, weight_portion = 0.2, nudge_mode="normal"):
        """selects a random subset of the population based on pop_portion
           and mutates a random subset of their weights based on weight_portion
           nudge_type is the pdf from which the random mutation is drawn and added to the weights
           
           args:
           nudge_mode: one of 'normal', 'uniform', None
           """
        
        # Select players to be mutated: not even between white and black
        all_players = self.white_list + self.black_list
        selected_players = random.sample(all_players, k=max(1,round(len(all_players)*pop_portion)))
        
        for player in selected_players:
            
            new_weights = []
            
            for lay in player.nn.dense_layers:
                
                # get dimensions of the layer
                height = lay.weights[0].shape[0]
                width = lay.weights[0].shape[1]
                
                # get a random slice of the layer to be mutated
                start = max(1,round(random.uniform(0,(1-weight_portion))*height))
                end = round(start + (weight_portion*height))
                selected_weights = lay.weights[0][start:end]
                immutable_weigths_1 = lay.weights[0][:start]
                immutable_weigths_2 = lay.weights[0][end:]
                
                # add disturbance to the selected weights:
                
                # turn it to numpy
                mutated_weights = selected_weights.numpy()
                
                # standard normal
                if nudge_mode == "normal":
                    disturbance = np.random.normal(size=(mutated_weights.shape[0]*mutated_weights.shape[1])
                                                   ).reshape(mutated_weights.shape[0],mutated_weights.shape[1])
                # uniform
                elif nudge_mode == "uniform":
                    disturbance = np.random.uniform(size=(mutated_weights.shape[0]*mutated_weights.shape[1])
                                                    ).reshape(mutated_weights.shape[0],mutated_weights.shape[1])
                # normal with random variance
                else:
                    disturbance = np.random.normal(loc=0,
                                                   scale=random.randint(-2,2)+random.random(),
                                                   size=(mutated_weights.shape[0]*mutated_weights.shape[1])
                                                    ).reshape(mutated_weights.shape[0], mutated_weights.shape[1])
                    
                mutated_weights = mutated_weights + disturbance
                
                # turn the weights back to np.array of appropriate shape (np.stack only works for identical shapes)
                w_1 = immutable_weigths_1.numpy().tolist()
                w_2 = mutated_weights.tolist()
                w_3 = immutable_weigths_2.numpy().tolist()
                w = w_1 + w_2 + w_3
                new_layer_weight = np.array(w).reshape(height, width)
                
                # append to new_weights list
                new_weights.append(new_layer_weight)
            
            # After the loop through layers:
            # Create new player with the new weights
            new_player = Player(player.color, "ai",
                                   kernel_initializer=new_weights,
                                   id=player.id)
            
            # Assign the new player's NeuralNet back to the old player (before mutation player)
            player.nn = new_player.nn
            player.nn.player = player
        
    
    def recreate(self, n, generation):
        """Generates n/2 players (offsprigs) with intracolor reproduction (white+white --> white)
        
        args:
        n: int, number of parents to select, 
                must be even to reproduce n/2 offsprings of one color, 
                the n/2 new offsprings replace n/2 weak players
                so n decides the rate of change of the whole population
        generation: int, passed from self.run() to produce new unique ids for the new players (offsprings)
    
        """
        assert n < self.size     # n (number of parents) must be smaller than population size
        assert n % 2 == 0        # n must be even to produce n/2 offsprings
        
        # get best n whites and best n blacks
        whites, blacks = self.get_fittest(n=n)
        
        # get least fit n/2 whites, n/2 blacks
        whites_bad, blacks_bad = self.get_least_fit(n=round(n/2))
        
        # Do it for whites and then for blacks
        for fittest_players in [whites, blacks]:
        
            # shuffle the 2 players (both same color)
            random.shuffle(fittest_players)

            # for all players (all best whites, or all best blacks) select a pair
            for i in range(round(len(fittest_players)/2)):

                # list the offspring's weight arrays (one array per layer)
                offspring_weights = []

                # select the pair of parent players
                parent_top = fittest_players[i]
                parent_bottom = fittest_players[-(i+1)]
                
                # select a weak player to drop from the population
                if parent_top.color == "white":
                    if not isinstance(whites_bad, list):
                        weakest_link = random.choice([whites_bad])
                    else:
                        weakest_link = random.choice(whites_bad)
                else:
                    if not isinstance(blacks_bad, list):
                        weakest_link = random.choice([blacks_bad])
                    else:
                        weakest_link = random.choice(blacks_bad)

                # for every layer get the top weights of one parent and the bottom weights of the other
                for lay in range(len(parent_top.nn.dense_layers)):
                    height = parent_top.nn.dense_layers[lay].weights[0].shape[0]
                    width = parent_top.nn.dense_layers[lay].weights[0].shape[1]
                    
                    # Select a random proportion to be defined as top and 1-proportion to be the bottom
                    proportion = random.uniform(0.2, 0.8)

                    top_weights = parent_top.nn.dense_layers[lay].weights[0][:round(height * proportion)]
                    bottom_weights = parent_bottom.nn.dense_layers[lay].weights[0][round(height * proportion):]
                    
                    # stack them to shape a full (64, 16) array
                    top_weights = top_weights.numpy().tolist()
                    bottom_weights = bottom_weights.numpy().tolist()
                    w = top_weights + bottom_weights
                    offspring_layer_weight = np.array(w).reshape(height, width)

                    # append it to the offspring's weight list
                    offspring_weights.append(offspring_layer_weight)

                # After looping through the layers
                # Create the offspring using the weight list
                offspring = Player("white", "ai",
                                   kernel_initializer=offspring_weights,
                                   id=int(str(generation)+str(i)))

                # Replace the selected weakest with the new offspring
                weakest_link.id = offspring.id
                weakest_link.nn = offspring.nn
                weakest_link.nn.player = weakest_link
            
            
            

In [2]:
population = Population(size=4, recreation_rate=0.5)

No model path: Initializing a new NeuralNet
Metal device set to: Apple M1
No model path: Initializing a new NeuralNet
No model path: Initializing a new NeuralNet
No model path: Initializing a new NeuralNet


2023-01-28 19:58:42.821443: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-01-28 19:58:42.821497: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
new_population = population.run(max_gen=2, save=False)

----------------------------------------
|b_r1|b_k1|b_b1|b_qn|b_kg|b_b2|b_k2|b_r2|
----------------------------------------
|b_p1|b_p2|b_p3|b_p4|b_p5|b_p6|b_p7|b_p8|
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|w_p1|w_p2|w_p3|w_p4|w_p5|w_p6|w_p7|w_p8|
----------------------------------------
|w_r1|w_k1|w_b1|w_qn|w_kg|w_b2|w_k2|w_r2|
----------------------------------------

50
100
150
Reached max_steps=200 -- stopping game
----------------------------------------
|    |    |    |b_r1|b_kg|    |b_k2|w_b1|
----------------------------------------
|b_p1|    |    |b_p4|b_p5|b_p6|    |    |
----------------------------------------
|    |b_p2|    |    |    |    |    |b

In [None]:
black = Player("black", "ai", id=1000)
for white in new_population.white_list:
    board = Board(white, black, max_steps=800)
    winner, loser, tie = board.play(show=False, verbose=False)

In [4]:
[(player.elo_score, player) for player in new_population.white_list]

[(1399.2, white), (1401.6, white)]

In [5]:
p = Player("black", "human")

In [9]:
b = Board(new_population.white_list[1], p, reset=True, max_steps=500)
b.play()

Move: w_k2 -- [8 3]
----------------------------------------
|b_r1|b_k1|b_b1|b_qn|b_kg|b_b2|b_k2|b_r2|
----------------------------------------
|b_p1|b_p2|b_p3|b_p4|b_p5|b_p6|b_p7|b_p8|
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |    |    |w_k2|
----------------------------------------
|w_p1|w_p2|w_p3|w_p4|w_p5|w_p6|w_p7|w_p8|
----------------------------------------
|w_r1|w_k1|w_b1|w_qn|w_kg|w_b2|    |w_r2|
----------------------------------------

Select: piece, x, y 
b_k1, 3, 6
Move: b_k1 -- [3 6]
----------------------------------------
|b_r1|    |b_b1|b_qn|b_kg|b_b2|b_k2|b_r2|
----------------------------------------
|b_p1|b_p2|b_p3|b_p4|b_p5|b_p6|b_p7|b_p8|
----------------------------------------
|    |    |b_k1

Select: piece, x, y 
b_k2, 5, 3
Move: b_k2 -- [5 3]
w_p5
----------------------------------------
|b_r1|    |b_b1|b_qn|b_kg|b_b2|    |b_r2|
----------------------------------------
|b_p1|b_p2|b_p3|b_p4|b_p5|b_p6|b_p7|b_p8|
----------------------------------------
|    |    |    |    |    |    |    |    |
----------------------------------------
|    |w_b2|    |    |    |    |    |    |
----------------------------------------
|    |    |    |    |    |w_qn|    |    |
----------------------------------------
|    |    |    |    |b_k2|    |    |w_k2|
----------------------------------------
|w_p1|w_p2|w_p3|w_p4|    |w_p6|w_p7|w_p8|
----------------------------------------
|w_r1|w_k1|w_b1|    |w_kg|    |w_r2|    |
----------------------------------------

Move: w_qn -- [1 4]
----------------------------------------
|b_r1|    |b_b1|b_qn|b_kg|b_b2|    |b_r2|
----------------------------------------
|b_p1|b_p2|b_p3|b_p4|b_p5|b_p6|b_p7|b_p8|
----------------------------------------
|    |    

KeyboardInterrupt: Interrupted by user