In [120]:
from collections import defaultdict, Counter
import urllib.request
import numpy as np
import matplotlib.pyplot as plt
#import pickle
import dill as pickle
%matplotlib inline
#from tqdm import tqdm
import time
import math
from scipy.stats import expon
import random
import sys
from tqdm import tqdm_notebook as tqdm

In [167]:
class Player:
    
    def __init__(self,player_number):
        self.player_number = player_number
    
    def initial_state(self,n):
        
        self.stick_count = defaultdict(lambda: defaultdict(int))
        self.stick_prob = defaultdict(lambda: defaultdict(float))
        
        
        for i in range(1,n+1):   #iterating over no. of sticks
            for j in range(1,4):
                self.stick_count[i][j] = 3
                self.stick_prob[i][j] = 1/3
    
     
    #updating policy for winning
    def update_policy_winner(self):
        
        for k in self.stick_no.keys():
            self.stick_count[k][self.stick_no[k]] += 1
            for i in range(1,4):
                 self.stick_prob[k][i] = self.stick_count[k][i] / sum(self.stick_count[k].values())

    #updating policy for losing
    def update_policy_loser(self):
        
        for k in self.stick_no.keys():
            if sum(self.stick_count[k].values()) > 1 and self.stick_count[k][self.stick_no[k]]>0:
                self.stick_count[k][self.stick_no[k]] -= 1
            for i in range(1,4):
                 self.stick_prob[k][i] = self.stick_count[k][i] / sum(self.stick_count[k].values())
        
    
    def save_policy(self):
        with open('policy_%s.bin' % ('first' if self.player_number == 1 else 'second'), 'wb') as f:
            pickle.dump(self.stick_prob, f)
            pickle.dump(self.stick_count, f)

    def load_policy(self):
        with open('policy_%s.bin' % ('first' if self.player_number == 1 else 'second'), 'rb') as f:
            self.stick_prob = pickle.load(f)
            self.stick_count = pickle.load(f)


class State:
    
    def __init__(self,n, player1, player2, N,epsilon = 0.8):
        
        self.n = n
        self.sticks = n
        self.player1 = player1
        self.player2 = player2
        self.epsilon = epsilon
        self.N = N
    
    def reset_state(self):
        self.sticks = self.n
        self.player1.stick_no = {}
        self.player2.stick_no = {}
    
    def update_state(self, player, sim):
        
        
        keys = list(player.stick_prob[self.sticks].keys())
        values = list(player.stick_prob[self.sticks].values())
        
        
        if expon.pdf(sim/self.N, loc = 0, scale = 1) > self.epsilon and np.random.uniform(0,1) > 0.5:
            
            rand_index_1 = np.random.choice(len(keys))
            player.stick_no[self.sticks] = keys[rand_index_1]
            
        else:    
            rand_index_1 = np.random.choice(len(keys), 1, p=values)
            player.stick_no[self.sticks] = keys[rand_index_1[0]]
        self.sticks = self.sticks - player.stick_no[self.sticks]

        
    #checking the winner
    def check_winner(self,winner):
        
        if winner == 1:
            self.player1.update_policy_winner()
            self.player2.update_policy_loser()
        else :
            self.player2.update_policy_winner()
            self.player1.update_policy_loser()
    
        
                

In [168]:
def train(N,n):
    
    """
    Train the bot to play a stick game by making two AI players to compete

    Parameters
    ----------
    N : no. of simulations
    n : no. of sticks

    Returns
    -------
    
    Trained AI players
    
    player1 : object
    player2 : object
    
    
    Examples
    --------
    train(50000,50)
    
    """
    
    
    player1 = Player(1)
    player2 = Player(2)
    
    state = State(n, player1, player2,N, 0.8)
    
    
    player1.initial_state(n)
    player2.initial_state(n)
    
    
        
    for sim in tqdm(range(N)):
        #print (sim)

        state.reset_state()

        while True:

            if state.sticks > 0:
                state.update_state(player1, sim)
                #print (state.sticks)
            else:
                winner = 1
                #print ("Player 1 Wins")
                break

            if state.sticks > 0:
                state.update_state(player2, sim)
            else:
                winner = 2
                #print ("Player 2 Wins")
                break

        state.check_winner(winner)


    player1.save_policy()
    player2.save_policy()

    time.sleep(0.01)

        

In [169]:
train(50000,50)

HBox(children=(IntProgress(value=0, max=50000), HTML(value='')))

In [177]:
def play_game():
        
        """Play the game of sticks

                          
        Examples
        --------
        >>> play_game()
        
        """
        
        player = Player(2)
        player.load_policy()
        val = input("Enter number of sticks between 10 and 50: ")
        val = int(val)
        
        print("Start the game")
        
        #print (player.stick_prob)
        
        while True:
            
            if val == 1:
                print ("Computer Won")
                break
            
            p = input("Pick any number of sticks from 1,2,3 : ")
            
            if int(p) < 4:
            
                val -=  int(p)

                if val == 1:
                    print ("Player Won")
                    break

                keys = list(player.stick_prob[val].keys())
                values = list(player.stick_prob[val].values())
                #rand_index_2 = np.random.choice(len(keys), 1, p=values)
                #s = keys[rand_index_2[0]]
                s = keys[np.argmax(values)]
                val -= s
                print (val)
            else:
                print("Invalid Entry")
                print (val)


In [178]:
play_game()

Enter number of sticks between 10 and 50: 20
Start the game
Pick any number of sticks from 1,2,3 : 2
17
Pick any number of sticks from 1,2,3 : 4
Invalid Entry
17
Pick any number of sticks from 1,2,3 : 2
13
Pick any number of sticks from 1,2,3 : 1
9
Pick any number of sticks from 1,2,3 : 2
5
Pick any number of sticks from 1,2,3 : 3
1
Computer Won


In [None]:
class stickbot:
    """
    n : No. of sticks
    """
    def __init__(self,n):
        self.n_sticks = n
    
    def fit(self,N):
        """Train the bot to play a stick game by making two AI players to compete

        Parameters
        ----------
        N : no. of simulations
               
        Returns
        -------
        player_2_prob : dict
                    storing the trained output of the player 2
             
        Examples
        --------
        >>> game1 = stickbot(100)
        >>> game1.fit(100000)
        
        """
        
        self.player_1_stick = defaultdict(lambda: defaultdict(int))
        self.player_1_prob = defaultdict(lambda: defaultdict(float))
        
        self.player_2_stick = defaultdict(lambda: defaultdict(int))
        self.player_2_prob = defaultdict(lambda: defaultdict(float))
        
        
        
        for i in range(1,self.n_sticks+1):
            for j in range(1,4):
                self.player_1_stick[i][j] = 1
                self.player_1_prob[i][j] = 1/3

                self.player_2_stick[i][j] = 1
                self.player_2_prob[i][j] = 1/3

            

        for sim in range(N):
            #print("Start Game......",sim)
            stick_no_1 = {}
            stick_no_2 = {}
            sticks = self.n_sticks
            
            while True:
                
                if sticks > 0:
                    keys = list(self.player_1_prob[sticks].keys())
                    values = list(self.player_1_prob[sticks].values())
                    rand_index_1 = np.random.choice(len(keys), 1, p=values)
                    stick_no_1[sticks] = keys[rand_index_1[0]]

                    sticks = sticks - stick_no_1[sticks]
                    #print (sticks)
                    
                
                else:
                    winner = 1
                    #print ("Player 1 Wins")
                    break

                if sticks > 0:
                    keys = list(self.player_2_prob[sticks].keys())
                    values = list(self.player_2_prob[sticks].values())
                    rand_index_2 = np.random.choice(len(keys), 1, p=values)
                    stick_no_2[sticks] = keys[rand_index_2[0]]

                    sticks = sticks - stick_no_2[sticks]
                    #print (sticks)
                
                else:
                    winner = 2
                    #print ("Player 2 Wins")
                    break

            
            if winner == 1:
                for k in stick_no_1.keys():
                    self.player_1_stick[k][stick_no_1[k]] += 1
                    for i in range(1,4):
                         self.player_1_prob[k][i] = self.player_1_stick[k][i] / sum(self.player_1_stick[k].values())

                for k in stick_no_2.keys():
                    if sum(self.player_2_stick[k].values()) > 1:
                        self.player_2_stick[k][stick_no_2[k]] -= 1

                        for i in range(1,4):
                            self.player_2_prob[k][i] = self.player_2_stick[k][i] / sum(self.player_2_stick[k].values())
                            

            else :
                for k in stick_no_2.keys():
                    self.player_2_stick[k][stick_no_2[k]] += 1
                    for i in range(1,4):
                        self.player_2_prob[k][i] = self.player_2_stick[k][i] / sum(self.player_2_stick[k].values())

                for k in stick_no_1.keys():
                    if sum(self.player_1_stick[k].values()) > 1:
                        self.player_1_stick[k][stick_no_1[k]] -= 1

                        for i in range(1,4):
                            self.player_1_prob[k][i] = self.player_1_stick[k][i] / sum(self.player_1_stick[k].values())
                            
  
                
            
            #print("End Game......",sim)
        
        #return [self.player_2_prob,self.player_2_stick]
    
    def play_game(self):
        
        """Play the game of sticks

                          
        Examples
        --------
        >>> game1.play_game()
        
        """
        
        
        val = input("Enter number of sticks between 10 and 100: ")
        val = int(val)
        
        print("Start the game")
        
        print (self.player_2_prob)
        print (self.player_1_prob)
        
        while True:
            
            if val == 1:
                print ("Computer Won")
                break
            
            p = input("Pick any number of sticks from 1,2,3 : ")
            
            val -=  int(p)
            
            if val == 1:
                print ("Player Won")
                break
            
            keys = list(self.player_2_prob[val].keys())
            values = list(self.player_2_prob[val].values())
            #rand_index_2 = np.random.choice(len(keys), 1, p=values)
            #s = keys[rand_index_2[0]]
            s = keys[np.argmax(values)]
            val -= s
            print (val)
                       

In [42]:
game1 = stickbot(50)

In [43]:
game1.fit(100000)

In [46]:
game1.play_game()

Enter number of sticks between 10 and 100: 20
Start the game
defaultdict(<function stickbot.fit.<locals>.<lambda> at 0x0000017AA3DC89D8>, {1: defaultdict(<class 'float'>, {1: 0.0, 2: 0.0, 3: 1.0}), 2: defaultdict(<class 'float'>, {1: 0.6, 2: 0.2, 3: 0.2}), 3: defaultdict(<class 'float'>, {1: 0.75, 2: 0.25, 3: 0.0}), 4: defaultdict(<class 'float'>, {1: 0.0, 2: 0.0, 3: 1.0}), 5: defaultdict(<class 'float'>, {1: 0.0, 2: 0.0, 3: 1.0}), 6: defaultdict(<class 'float'>, {1: 0.3333333333333333, 2: 0.0, 3: 0.6666666666666666}), 7: defaultdict(<class 'float'>, {1: 0.0, 2: 1.0, 3: 0.0}), 8: defaultdict(<class 'float'>, {1: 0.0, 2: 0.3333333333333333, 3: 0.6666666666666666}), 9: defaultdict(<class 'float'>, {1: 0.5, 2: 0.0, 3: 0.5}), 10: defaultdict(<class 'float'>, {1: 1.0, 2: 0.0, 3: 0.0}), 11: defaultdict(<class 'float'>, {1: 0.7112922002328289, 2: 0.28870779976717115, 3: 0.0}), 12: defaultdict(<class 'float'>, {1: 0.0, 2: 0.25, 3: 0.75}), 13: defaultdict(<class 'float'>, {1: 0.4426652892561983