In [1]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show, output_notebook

## player: AI

In [2]:
class Player:

    def __init__(self, heap):
        self.history = {}
        self.distribution = np.ones((heap + 1, 3), dtype=int)
        self.cutoff = 1000

    def __call__(self, heap):
        # randomize move based on previous games
        dist = self.distribution[heap].cumsum()
        rnd = np.random.randint(dist[2])
        move = 1 if rnd < dist[0] else 2 if rnd < dist[1] else 3
        
        # store move in history
        self.history[heap] = min(heap, move)
        
        return self.history[heap]

    def learn(self, winner):
        # update move distribution
        for heap, move in self.history.items():
            if winner is self:
                self.distribution[heap][move - 1] += 1
            else:
                self.distribution[heap][move - 1] -= 1
                self.distribution[heap] += 1

        # normalize distribution to speed learning up
        normalize = np.argwhere(self.distribution.sum(axis=1) > self.cutoff)
        for heap in normalize:
            self.distribution[heap] -= self.distribution[heap].min() - 1

        # reset game history
        self.history = {}
    
    def strategy(self):
        distribution = self.distribution[1:]
        return distribution.T / distribution.sum(axis=1)

## opponents

In [3]:
def expert_opponent(heap):
    return heap % 4 or min(heap, np.random.randint(1, 4))

In [4]:
def random_opponent(heap):
    return min(heap, np.random.randint(1, 4))

In [5]:
def take_n_opponent(take):
    return lambda heap: min(heap, take)

## training

In [6]:
def play(heap, player, opponent):
    players = player, opponent
    wins = 0

    for game in range(100001):
        # update plot periodically
        if game % 10000 == 0:
            print(game, 'games, W/L ratio', wins / 10000)
            wins = 0

        # a single game
        h = heap
        while h:
            h -= players[0](h)
            players = players[1], players[0]

        winner = players[1]
        wins += winner is player
            
        # let player learn
        player.learn(winner)
        
    # plot distribution
    plot_strategy(heap, player)

In [7]:
def plot_strategy(heap, player):
    output_notebook()

    # data
    take_1, take_2, take_3 = player.strategy()
    take_2 += take_1
    take_3 += take_2
    kwargs = {'x': range(1, heap + 1), 'width': .8}

    # plot
    plot = figure(plot_width=600, plot_height=400)
    plot.vbar(**kwargs, bottom=0, top=take_1, legend='take 1', color='#a44444')
    plot.vbar(**kwargs, bottom=take_1, top=take_2, legend='take 2', color='#88a888')
    plot.vbar(**kwargs, bottom=take_2, top=take_3, legend='take 3', color='#ccccac')
    show(plot)

## learning

In [8]:
HEAP = 21

In [9]:
play(HEAP, Player(HEAP), expert_opponent)

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.0082
20000 games, W/L ratio 0.0129
30000 games, W/L ratio 0.0146
40000 games, W/L ratio 0.0321
50000 games, W/L ratio 0.0325
60000 games, W/L ratio 0.1141
70000 games, W/L ratio 0.4532
80000 games, W/L ratio 0.4985
90000 games, W/L ratio 0.4992
100000 games, W/L ratio 0.4994


In [10]:
play(HEAP, Player(HEAP), random_opponent)

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.8589
20000 games, W/L ratio 0.949
30000 games, W/L ratio 0.9633
40000 games, W/L ratio 0.9634
50000 games, W/L ratio 0.9642
60000 games, W/L ratio 0.9674
70000 games, W/L ratio 0.9684
80000 games, W/L ratio 0.9675
90000 games, W/L ratio 0.9691
100000 games, W/L ratio 0.9716


In [11]:
play(HEAP, Player(HEAP), take_n_opponent(1))

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.9976
20000 games, W/L ratio 0.9995
30000 games, W/L ratio 0.9996
40000 games, W/L ratio 0.9998
50000 games, W/L ratio 0.9998
60000 games, W/L ratio 1.0
70000 games, W/L ratio 1.0
80000 games, W/L ratio 0.9999
90000 games, W/L ratio 1.0
100000 games, W/L ratio 1.0


In [12]:
play(HEAP, Player(HEAP), take_n_opponent(3))

0 games, W/L ratio 0.0
10000 games, W/L ratio 0.9706
20000 games, W/L ratio 0.9971
30000 games, W/L ratio 0.9969
40000 games, W/L ratio 0.9989
50000 games, W/L ratio 0.9997
60000 games, W/L ratio 0.9998
70000 games, W/L ratio 1.0
80000 games, W/L ratio 1.0
90000 games, W/L ratio 0.9999
100000 games, W/L ratio 1.0
