In [1]:
import random
import sys

import numpy as np

from genetic_lib.player import Player
from genetic_lib.ruleset import RuleSet
from genetic_lib.game import Game
from genetic_lib.generation import Generation, Fitness, WinFitness, Reproduction, AverageReproduction

In [2]:
class TicTacToeRuleSet( RuleSet ):
    def getBoardSize( self ):
        return 9

    def getWinner( self, board ):
        winning_positions = [
            # horizontal
            [ 0, 1, 2 ], # < Heh, this is actually a 2d representation of the board
            [ 3, 4, 5 ],
            [ 6, 7, 8 ],
            # vertical
            [ 0, 3, 6 ],
            [ 1, 4, 7 ],
            [ 2, 5, 8 ],
            # diagonal
            [ 0, 4, 8 ],
            [ 2, 4, 6 ]
        ]
        for winning_position in winning_positions:
            if( board[ winning_position[ 0 ] ] == board[ winning_position[ 1 ] ] == board[ winning_position[ 2 ] ] != 0 ):
                return board[ winning_position[ 0 ] ]
        return 0

    def printBoard( self, board ):
        assert( len( board ) == 9 )
        for i, spot in enumerate( board ):
            if i != 0:
                if i % 3 == 0:
                    sys.stdout.write( '\n--------------\n' )
                else: 
                    sys.stdout.write( '|' )
            sys.stdout.write( str( spot ).center( 4, ' ' ) )
        sys.stdout.write( ' winner: ' + str( self.getWinner( board ) ) )
        sys.stdout.write( '\n' )

In [3]:
test_board = np.full( ( 9 ), 0 )

tictactoe_ruleset = TicTacToeRuleSet()

player_1 = Player( tictactoe_ruleset )

print( player_1.getMove( test_board ) )

player_2 = Player( tictactoe_ruleset )

game = Game( tictactoe_ruleset, [ player_1, player_2 ] )
print( game.play() )
tictactoe_ruleset.printBoard( game.board )

8
(0, 5, 0)
 0  | 0  | 1  
--------------
 0  | 0  | 1  
--------------
 -1 | -1 | 1   winner: 1


In [4]:
# Reproduce the tictactoe.ipynb

header = " G ||  W  ||  W  |  T  |  L  ||  W  |  T  |  L  |"
print( header )

def formatValue( value ):
    return str( "%.3f" % value ).center( 5, ' ' ) + '|'

def printValidationResults( generation, validation_results ):
    games = np.sum( validation_results )
    wins = np.sum( validation_results[ :, 0 ] )

    output = str( generation ).center( 3, ' ' ) + '||'
    winrate = float( wins ) / float( games )
    output += formatValue( winrate ) + '|'

    games_first = np.sum( validation_results[ 0 ] )
    rates_first = validation_results[ 0 ].astype( float ) / float( games_first )

    output += formatValue( rates_first[ 0 ] )
    output += formatValue( rates_first[ 1 ] )
    output += formatValue( rates_first[ 2 ] ) + '|'

    games_second = np.sum( validation_results[ 1 ] )
    rates_second = validation_results[ 1 ].astype( float ) / float( games_second )

    output += formatValue( rates_second[ 0 ] )
    output += formatValue( rates_second[ 1 ] )
    output += formatValue( rates_second[ 2 ] )

    print( output )

tictactoe_ruleset = TicTacToeRuleSet()

starting_players = [Player( tictactoe_ruleset ) for i in range( 100 )]
current_generation = Generation( tictactoe_ruleset, starting_players )
for i in range( 30 ):
    validation_results = current_generation.compareToRandom()
    printValidationResults( i + 1, validation_results )
    current_generation.run()
    current_generation = current_generation.reproduce()

 G ||  W  ||  W  |  T  |  L  ||  W  |  T  |  L  |
 1 ||0.476||0.644|0.073|0.283||0.312|0.134|0.553|
 2 ||0.438||0.594|0.141|0.265||0.283|0.088|0.629|
 3 ||0.474||0.638|0.138|0.223||0.296|0.121|0.583|
 4 ||0.492||0.652|0.099|0.249||0.328|0.081|0.591|
 5 ||0.536||0.659|0.084|0.257||0.414|0.080|0.506|
 6 ||0.572||0.713|0.074|0.213||0.421|0.029|0.550|
 7 ||0.614||0.748|0.073|0.179||0.484|0.067|0.449|
 8 ||0.598||0.775|0.081|0.143||0.409|0.066|0.525|
 9 ||0.606||0.764|0.100|0.135||0.436|0.062|0.502|
 10||0.600||0.776|0.094|0.129||0.416|0.065|0.518|
 11||0.638||0.819|0.060|0.121||0.460|0.071|0.468|
 12||0.636||0.794|0.058|0.148||0.486|0.070|0.444|
 13||0.666||0.826|0.066|0.109||0.496|0.112|0.393|
 14||0.632||0.796|0.037|0.167||0.475|0.059|0.467|
 15||0.598||0.828|0.050|0.122||0.345|0.109|0.546|
 16||0.630||0.798|0.035|0.167||0.453|0.099|0.449|
 17||0.646||0.795|0.073|0.131||0.485|0.083|0.432|
 18||0.650||0.817|0.039|0.144||0.473|0.049|0.477|
 19||0.628||0.793|0.048|0.159||0.462|0.060|0.478|


In [9]:
# Try using a fitness function that ignores loses going second

class IgnoreLossesFitness( Fitness ):
    def map( self, other_results, match_results ):
        if 'wins_and_ties' not in other_results:
            other_results[ 'wins_and_ties' ] = 0
            other_results[ 'games' ] = 0

        if match_results[ 'went_first' ]:    
            other_results[ 'games' ] += 1
            if match_results[ 'won' ]:
                other_results[ 'wins_and_ties' ] += 1
            elif match_results[ 'game_results' ][ 0 ] == None:
                other_results[ 'wins_and_ties' ] += 1
            else:
                tictactoe_ruleset.printBoard( match_results[ 'game' ].board )

                
        return other_results
    
    def reduce( self, all_results ):
        if all_results[ 'games' ] == 0:
            return 0
        return float( all_results[ 'wins_and_ties' ] ) / float( all_results[ 'games' ] )
    

ignore_losses_fitness = IgnoreLossesFitness()    

starting_players = [Player( tictactoe_ruleset ) for i in range( 100 )]
current_generation = Generation( tictactoe_ruleset, starting_players, fitness=ignore_losses_fitness, games=5000 )
for i in range( 30 ):
    validation_results = current_generation.compareToRandom()
    printValidationResults( i + 1, validation_results )
    current_generation.run()
    current_generation = current_generation.reproduce()

 1 ||0.436||0.589|0.147|0.264||0.273|0.103|0.624|
 -1 | 1  | -1 
--------------
 -1 | 0  | 1  
--------------
 -1 | 1  | 1   winner: -1
 -1 | 0  | 1  
--------------
 1  | -1 | 0  
--------------
 0  | 1  | -1  winner: -1
 1  | -1 | 1  
--------------
 1  | -1 | 1  
--------------
 -1 | -1 | 0   winner: -1
 1  | 1  | 0  
--------------
 -1 | -1 | -1 
--------------
 1  | -1 | 1   winner: -1
 1  | 1  | -1 
--------------
 0  | 1  | -1 
--------------
 0  | 0  | -1  winner: -1
 0  | 1  | -1 
--------------
 1  | 1  | -1 
--------------
 0  | 0  | -1  winner: -1
 1  | 1  | -1 
--------------
 -1 | -1 | -1 
--------------
 1  | 1  | 0   winner: -1
 -1 | 1  | 1  
--------------
 0  | -1 | 1  
--------------
 -1 | 1  | -1  winner: -1
 -1 | -1 | 1  
--------------
 1  | -1 | 1  
--------------
 1  | -1 | 0   winner: -1
 -1 | 1  | -1 
--------------
 -1 | 1  | 1  
--------------
 -1 | 0  | 1   winner: -1
 -1 | -1 | -1 
--------------
 1  | 1  | 0  
--------------
 1  | -1 | 1   winner: -1
 1  