In [8]:
import numpy as np
import itertools
import random

## classical back updating method to solving TicTacToe

# Cartesian Product
def perm(n, seq):
        L = []
        for p in itertools.product(seq, repeat=n):
            L.append(tuple(map(int,p)))
        return L
  
    
class Game:
    
    def __init__(self, board = None,player_id = 1):
        self.board = np.zeros((3,3),dtype=np.int8) if board is None else board
        self.size = len(self.board)
        self.player_id = player_id
    
    def make_move(self,i,j):
        assert(self.board[i][j] == 0)
        self.board[i][j] = self.player_id
        if self.player_id == 1:
            self.player_id = 2
        else:
            self.player_id = 1
    
    @staticmethod
    def all_equal_value(s):
        for i in range(1,len(s)):
            if s[i] != s[0]:
                return -1
        return s[0]
    
    def is_tied(self):
        #if self.board.tobytes() in game_states.keys():
            #print self.board
            #print "hit tie"
            #print game_states[state.tobytes()] == -1
            #return game_states[self.board.tobytes()] == -1
        for i in range(self.size ):
            for j in range(self.size):
                if self.board[i][j] == 0:
                    return False
        return True
    
    def set_valid_moves(self):
        s = set()
        for i in range(self.size  ):
            for j in range(self.size):
                if self.board[i,j] == 0:
                    s.add((i,j))
        return s
    
    # 0 is no winner
    # 1 is player 1 won
    # 2 is player 2 won
    def is_won(self):
        #if self.board.tobytes() in game_states.keys():
            #print self.board
            #print "hit won"
            #print game_states[self.board.tobytes()]
            #if game_states[self.board.tobytes()] < 1:
            #    return 0
            #return game_states[self.board.tobytes()]
        for i in range(self.size   ):
            if Game.all_equal_value(self.board[i,:]) > 0:
                return Game.all_equal_value(self.board[i,:])
        for j in range(self.size):
            if Game.all_equal_value(self.board[:,j]) > 0:
                return Game.all_equal_value(self.board[:,j])
        
        if self.board[0][0] > 0:
            winner_along_diag = self.board[0,0]
            for i in range(1,self.size):
                if self.board[0,0] != self.board[i,i]:
                    winner_along_diag = 0
            if winner_along_diag != 0:
                return winner_along_diag
        if self.board[0,self.size - 1] > 0:
            winner_along_diag = self.board[0,self.size-1]
            for i in range(1,self.size):
                if self.board[i,self.size - 1 - i] != self.board[0,self.size-1]:
                    winner_along_diag = 0
            if winner_along_diag != 0:
                return winner_along_diag
        return 0
      
    def is_over(self):
        return self.is_tied() or (self.is_won() > 0)
        
    def print_state(self):
        print self.board
    
    def is_valid_move(self,i,j):
        return self.board[i,j] == 0
        
        
    @staticmethod
    def main():
        g = Game()
        g.make_move(0,1)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        g.make_move(1,1)
        g.make_move(2,1)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        
        g = Game()
        g.make_move(1,0)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        g.make_move(1,1)
        g.make_move(1,2)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        
        g = Game()
        g.make_move(0,0)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        g.make_move(1,1)
        g.make_move(2,2)
        print(g.board)
        print(g.is_tied())
        print(g.is_won())
        
#for t in perm(game_size*game_size, "012"):
#        state = np.array(t,dtype=np.int8).reshape(game_size,game_size)
#        winner = Game(state).is_won()
        #print state
#        if winner == 1:
#            game_states[state.tobytes()] = 1
#        elif winner == 2:
#            game_states[state.tobytes()] = 2
#        elif Game(state).is_tied():
#            game_states[state.tobytes()] = -1
#        else:
#            game_states[state.tobytes()] = 0
        
class Player:
    def __init__(self):
        pass
    
    def decide_move(self,g):
        pass
    
    def update(self):
        pass
    
    
class Human_Player(Player):
    def decide_move(self,g):
        g.print_state()
        print("Player " + str(g.player_id) +", what is your move?")
        print("Format as \'x,y\' where the Top-left is 0,0")
        inpt = raw_input()
        return tuple(map(int, inpt.split(',')))[::-1]
    
class Referee():
    def __init__(self,p1,p2):
        self.p1 = p1
        self.p2 = p2
        self.game = Game()
        
    def run_game(self):
        while (not self.game.is_over()):
            player_to_ask = self.p1
            if self.game.player_id == 2:
                player_to_ask = self.p2
            move = player_to_ask.decide_move(self.game)
            self.game.make_move(move[0],move[1])
        if self.game.is_tied():
            #print "A Tie!"
            return -1
        if self.game.is_won():
            #print ("Player " + str(self.game.is_won()) + " wins!")
            self.p1.update()
            self.p2.update()
            return self.game.is_won()

class AI_Player(Player):
    
    def __init__(self,game_size = 3,alpha = 0.2,explo = 0.1):
        self.game_size = game_size
        self.value = {}
        self.alpha = alpha
        self.explo = explo
        self.update_list = []
        for t in perm(game_size*game_size, "012"):
            if t in self.value:
                print "UHOH COLLISION"
            state = np.array(t,dtype=np.int8).reshape(game_size,game_size)
            winner = Game(state).is_won()
            #print state
            if winner == 1:
                self.value[state.tobytes()] = 1
            elif winner == 2:
                self.value[state.tobytes()] = 0
            else:
                self.value[state.tobytes()] = 0.5
            #print state.tostring()
        #print self.value.keys()
            
    def decide_move(self,g):
        
        if random.random() > self.explo:
            state = g.board.copy()
            first = state.copy()
            best_move = (-1,-1)
            for m in g.set_valid_moves():
                if best_move == (-1,-1):
                    best_move = m
                else:
                    state[best_move] = g.player_id
                    curr_best = self.value[state.tobytes()]
                    state[best_move] = 0

                    state[m] = g.player_id
                    curr_guess = self.value[state.tobytes()]
                    state[m] = 0

                    if g.player_id == 1:
                        if curr_guess > curr_best:
                            best_move = m
                    else:
                        if curr_guess < curr_best:
                            best_move = m
                            
            curr_value = self.value[state.tobytes()]
            state[best_move] = g.player_id
            next_value = self.value[state.tobytes()]
            
            self.update_list.append((first,state))
            
            return best_move
                
        else:
            return random.sample(g.set_valid_moves(),1)[0]
    
    def train_against_self(self,number_of_games = 10000):
        for i in range(number_of_games):
            R = Referee(self,self)
            R.run_game()
            if i % 1000 == 0:
                print(str(i) + " games done.")
            
    def update(self):
        for i in reversed(self.update_list):
            first_value = self.value[i[0].tobytes()]
            second_value = self.value[i[1].tobytes()]
            
            self.value[i[0].tobytes()] = self.value[i[0].tobytes()] \
                                        + self.alpha* (self.value[i[1].tobytes()]-self.value[i[0].tobytes()])
        self.update_list = []

In [104]:
Game.main()

[[0 1 0]
 [0 0 0]
 [0 0 0]]
False
0
[[0 1 0]
 [0 2 0]
 [0 1 0]]
False
0
[[0 0 0]
 [1 0 0]
 [0 0 0]]
False
0
[[0 0 0]
 [1 2 1]
 [0 0 0]]
False
0
[[1 0 0]
 [0 0 0]
 [0 0 0]]
False
0
[[1 0 0]
 [0 2 0]
 [0 0 1]]
False
0


In [9]:
p = AI_Player()
#game_states

In [10]:
import time 
start = time.time()
p.train_against_self(number_of_games=30000)
print time.time() - start

0 games done.
1000 games done.
2000 games done.
3000 games done.
4000 games done.
5000 games done.
6000 games done.
7000 games done.
8000 games done.
9000 games done.
10000 games done.
11000 games done.
12000 games done.
13000 games done.
14000 games done.
15000 games done.
16000 games done.
17000 games done.
18000 games done.
19000 games done.
20000 games done.
21000 games done.
22000 games done.
23000 games done.
24000 games done.
25000 games done.
26000 games done.
27000 games done.
28000 games done.
29000 games done.
19.1447041035


In [159]:
for k in p.value.keys():
    if (p.value[k] not in [0,.5,1]):
        print (k, p.value[k])

('\x02\x02\x00\x00\x00\x00\x01\x00\x01', 0.7952)
('\x00\x00\x00\x00\x00\x00\x00\x00\x01', 0.5000000000000011)
('\x00\x00\x00\x00\x00\x00\x00\x00\x00', 0.5000000000000009)
('\x01\x01\x02\x00\x02\x02\x01\x01\x00', 0.06710886400000002)
('\x01\x01\x00\x02\x01\x01\x00\x02\x02', 0.2048)
('\x01\x02\x01\x00\x02\x00\x01\x00\x02', 0.6799999999999999)
('\x01\x02\x01\x02\x02\x00\x01\x01\x00', 0.4)
('\x01\x00\x00\x00\x02\x00\x00\x00\x00', 0.5000000000000009)
('\x01\x02\x00\x01\x02\x00\x00\x00\x00', 0.6799999999999999)
('\x00\x01\x02\x00\x02\x01\x01\x01\x02', 0.16384)
('\x01\x02\x02\x02\x00\x00\x01\x00\x01', 0.9859262511644672)
('\x02\x02\x00\x01\x01\x02\x00\x00\x01', 0.49664455679999997)
('\x00\x01\x00\x00\x02\x01\x01\x02\x00', 0.5000000571456595)
('\x00\x01\x00\x00\x02\x01\x01\x02\x02', 0.5350016000000001)
('\x01\x00\x00\x01\x00\x00\x00\x00\x02', 0.480404137984)
('\x02\x01\x00\x00\x00\x01\x02\x02\x01', 0.6)
('\x01\x01\x02\x00\x02\x00\x00\x01\x00', 1.3278449820419193e-20)
('\x01\x01\x02\x00\x02\x00

In [153]:
print(p.value['\x01\x00\x00\x00\x00\x00\x00\x00\x00'])
print(p.value['\x01\x00\x00\x00\x00\x02\x00\x00\x00'])

0.5
0.954665366239


In [7]:
p.explo = 0.02

R = Referee(Human_Player(),p)
R.run_game()

[[0 0 0]
 [0 0 0]
 [0 0 0]]
Player 1, what is your move?
Format as 'x,y' where the Top-left is 0,0
2,2
[[0 0 0]
 [0 2 0]
 [0 0 1]]
Player 1, what is your move?
Format as 'x,y' where the Top-left is 0,0
0,2
[[0 0 0]
 [0 2 0]
 [1 2 1]]
Player 1, what is your move?
Format as 'x,y' where the Top-left is 0,0
1,0
[[0 1 0]
 [0 2 2]
 [1 2 1]]
Player 1, what is your move?
Format as 'x,y' where the Top-left is 0,0
0,1
[[2 1 0]
 [1 2 2]
 [1 2 1]]
Player 1, what is your move?
Format as 'x,y' where the Top-left is 0,0
2,0


-1

In [157]:
R.run_game()

[[1 0 0]
 [0 0 0]
 [0 0 0]]
Player 2, what is your move?
Format as 'x,y' where the Top-left is 0,0
0,2
[[1 1 0]
 [0 0 0]
 [2 0 0]]
Player 2, what is your move?
Format as 'x,y' where the Top-left is 0,0
2,0
[[1 1 2]
 [0 1 0]
 [2 0 0]]
Player 2, what is your move?
Format as 'x,y' where the Top-left is 0,0
2,2
[[1 1 2]
 [1 1 0]
 [2 0 2]]
Player 2, what is your move?
Format as 'x,y' where the Top-left is 0,0
1,2


2

In [None]:
for i in game_states.keys():
    if game_states[i] == 2:
        print i.encode('unicode_escape')

In [None]:
print  r'\x02\x02\x00\x02\x00\x00\x02\x00\x00'

In [None]:
Game(boards = np.zeros((3,3)))

In [None]:
g.board

In [None]:
np.array_equal(g.board.reshape(1,-1)[0],( 0, 0, 0, 0, 0, 0, 0, 0, 0))

In [None]:
import itertools

def perm(n, seq):
    L = []
    for p in itertools.product(seq, repeat=n):
        L.append(tuple(map(int,p)))
    return L

print perm(2, "012")

In [None]:
board

In [None]:
res = tuple(map(int, x.split(','))) 

In [None]:
g = Game()
g.board

In [None]:
Game().board

In [None]:
Game().board

In [None]:
g.board[0][0]

In [None]:
g.board[x]

In [None]:
a = raw_input()
print(a)

In [None]:
np.ones((3,3))/9

In [None]:
g = Game()

In [None]:
g.board

In [None]:
Game().board

In [None]:
Game()

In [None]:
Game().board

In [None]:
Game().board

In [None]:
g1 = Game()
g2 = Game()

In [None]:
g1.make_move(1,2,1)

g1.board

In [None]:
g1.board

In [None]:
g2.board

In [None]:
g1

In [None]:
g2

In [None]:
g1.board[2,2] = 3

In [None]:
g1.board

In [None]:
g2.board

In [None]:
g1.board is g2.board

In [None]:
id(g1.board)

In [None]:
id(g2.board)

In [None]:
g1.board
g2.board

In [None]:
g1.board

In [None]:
class test:
    def __init__(self,val = 2):
        self.value = val

In [None]:
t = test()

In [None]:
s = test()

In [None]:
s.value

In [None]:
t.value

In [None]:
t.value = 3

In [None]:
s.value

In [None]:
x = np.zeros((3,3))
x[0,0] = 1

In [None]:
np.zeros((3,3))

In [None]:
x

In [None]:
import numpy as np

class Game:
    def __init__(self, board = np.zeros((3,3))):
        self.board = board.copy()
        
g1 = Game()  
g2 = Game()
g1.board[0,0] = 1
g2.board

In [None]:
import numpy as np

class Game:
    def __init__(self, board = [[0]]):
        self.board = board[:]
        
g1 = Game()  
g2 = Game()
g1.board[0][0] = 1
print g2.board

In [None]:
g2 = Game()

In [None]:
g1.board[0,0] = 1

In [None]:
g2.board

In [None]:
import numpy as np

class Game:
    def __init__(self, size = 6):
        self.size = size

In [None]:
g1 = Game()
g2 = Game()
g1.size = 4
g2.size

In [None]:
if np.zeros((3,3)) is None:
    print "hello"

In [None]:
b = None
b = np.zeros((3,3)) if b is None else b

In [None]:
a = np.zeros((3,3))
b = np.zeros((3,3))
print hash(a.tostring())
print hash(b.tostring())

In [None]:
for i in AI_Player.perm(4, "012"):
    out = np.array(i).reshape(2,2)
    print out

In [None]:
import random
random.random()

In [None]:
np.zeros((3,3))[random.sample(range(3), 2)]

In [None]:
np.zeros((3,3))

In [None]:
x=random.sample(range(3), 2)
tuple(x)
np.zeros((3,3))[tuple(x)]

In [None]:
np.zeros((1,1)).tostring()