In [0]:
from math import *
import random
import pandas as pd

In [0]:
class OXOState:
    """ A state of the game, i.e. the game board.
        Squares in the board are in this arrangement
        012
        345
        678
        where 0 = empty, 1 = player 1 (X), 2 = player 2 (O)
    """
    def __init__(self):
        self.playerJustMoved = 2 # At the root pretend the player just moved is p2 - p1 has the first move
        self.board = [0,0,0,0,0,0,0,0,0] # 0 = empty, 1 = player 1, 2 = player 2
        
    def Clone(self):
        """ Create a deep clone of this game state.
        """
        st = OXOState()
        st.playerJustMoved = self.playerJustMoved
        st.board = self.board[:]
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move.
            Must update playerToMove.
        """
        assert move >= 0 and move <= 8 and move == int(move) and self.board[move] == 0
        self.playerJustMoved = 3 - self.playerJustMoved
        self.board[move] = self.playerJustMoved
        
    def GetMoves(self):
        """ Get all possible moves from this state.
        """
        return [i for i in range(9) if self.board[i] == 0]
    
    def GetResult(self, playerjm):
        """ Get the game result from the viewpoint of playerjm. 
        """
        for (x,y,z) in [(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]:
            if self.board[x] == self.board[y] == self.board[z]:
                if self.board[x] == playerjm:
                    return 1.0
                else:
                    return 0.0
        if self.GetMoves() == []: return 0.5 # draw
        return False # Should not be possible to get here

    def __repr__(self):
        s= ""
        for i in range(9): 
            s += ".XO"[self.board[i]]
            if i % 3 == 2: s += "\n"
        return s


In [0]:
class Node:
    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
        Crashes if state not specified.
    """
    def __init__(self, move = None, parent = None, state = None):
        self.move = move # the move that got us to this node - "None" for the root node
        self.parentNode = parent # "None" for the root node
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
        
    def UCTSelectChild(self):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.
        """
        s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
        return s
    
    def AddChild(self, m, s):
        """ Remove m from untriedMoves and add a new child node for this move.
            Return the added child node
        """
        n = Node(move = m, parent = self, state = s)
        self.untriedMoves.remove(m)
        self.childNodes.append(n)
        return n
    
    def Update(self, result):
        """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
        """
        self.visits += 1
        self.wins += result

    def __repr__(self):
        return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"

    def TreeToString(self, indent):
        s = self.IndentString(indent) + str(self)
        for c in self.childNodes:
             s += c.TreeToString(indent+1)
        return s

    def IndentString(self,indent):
        s = "\n"
        for i in range (1,indent+1):
            s += "| "
        return s

    def ChildrenToString(self):
        s = ""
        for c in self.childNodes:
             s += str(c) + "\n"
        return s


def UCT(rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    if verbose: print(rootnode.TreeToString(0))
    else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame():
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState()

    df_t=[[0,0,0,0,0,0,0,0,0]] #Creating the default row (first row) (code line added)
    bm_list=[] # creating a best moves empty list (code line added)
    while state.GetMoves() != []:
        print(str(state))
        if state.playerJustMoved == 1:
            m = UCT(rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(rootstate=state, itermax=100, verbose=False) #itermax decides the total value of visits.
        print("Best Move: " + str(m) + "\n")
        state.DoMove(m)
        bm_list.append(m) # append the best moves to the list
        df_t.append(list(state.board))  #append all the positions list 
        if state.GetResult(state.playerJustMoved) != False:
            print(str(state))            
            break

    if state.GetResult(state.playerJustMoved) == 1.0:
        print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        print("Player " + str(3 - state.playerJustMoved) + " wins!")
    else: print("Nobody wins!")

    bm_list.append('NaN')  # Append NaN values ti the last state of each game because there are no more moves to choose. (code line added)
    
    
    #Join the postitions board with the correcponding best move
    for i in range(len(df_t)): # (code line added)
      df_t[i].append(bm_list[i]) # (code line added)
    return df_t # (code line added)

if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    #Generate the game for N times (for purpose of the assigment it will be generated 2000 games)
    df =[] # LINE ADDED
    for i in range(2000): # Runing the game for as many times the value in range is written (code line added)
      ds=UCTPlayGame() # Calling the game to play (code line added)
      df.append(ds) # Append the games into a dataframe (code line added)

    #Change from a list of listo into one list
    fdf=[] # (code line added)
    for i in df:  # (code line added)
      for j in i:  # (code line added)
        fdf.append(j)  # (code line added)

     
    #Formating the data frame
    data_df=pd.DataFrame(fdf, columns=['pos: 0', 'pos: 1', 'pos: 2', 'pos: 3', 'pos: 4',
                                       'pos: 5', 'pos: 6', 'pos: 7', 'pos: 8', 'Best Move']) #(code line added)
    #Deleting the rows that have NaN values 
    data_df=data_df.ix[data_df.iloc[:,-1]!='NaN'] #(code line added)

    #Generate the correct indexing for the rows (because it change after removing NaN values)
    data_df.reset_index(drop=True, inplace=True) #(code line added)
     
    #Printing the final Data Frame
    print("Final Dataset : ") # (code line added)
    print(data_df) # (code line added)

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m

[M:5 W/V:4.0/18 U:[]]
[M:2 W/V:2.5/16 U:[]]
[M:8 W/V:9.5/27 U:[]]
[M:6 W/V:6.0/21 U:[]]
[M:3 W/V:4.0/18 U:[]]

Best Move: 8

OX.
.O.
.XX

[M:6 W/V:424.5/443 U:[]]
[M:5 W/V:23.0/42 U:[]]
[M:3 W/V:479.0/495 U:[]]
[M:2 W/V:6.0/20 U:[]]

Best Move: 3

OX.
OO.
.XX

[M:5 W/V:4.0/37 U:[]]
[M:2 W/V:0.0/25 U:[]]
[M:6 W/V:4.0/38 U:[]]

Best Move: 6

OX.
OO.
XXX

Player 1 wins!
...
...
...

[M:6 W/V:10.5/13 U:[]]
[M:2 W/V:2.0/6 U:[5, 6, 8]]
[M:4 W/V:17.0/19 U:[]]
[M:5 W/V:5.0/9 U:[]]
[M:8 W/V:8.0/11 U:[]]
[M:1 W/V:3.5/7 U:[0, 2]]
[M:0 W/V:7.0/10 U:[]]
[M:7 W/V:7.0/10 U:[]]
[M:3 W/V:13.0/15 U:[]]

Best Move: 4

...
.X.
...

[M:5 W/V:10.5/81 U:[]]
[M:1 W/V:21.5/112 U:[]]
[M:0 W/V:106.0/316 U:[]]
[M:2 W/V:30.0/135 U:[]]
[M:3 W/V:30.5/136 U:[]]
[M:8 W/V:10.0/79 U:[]]
[M:6 W/V:12.0/85 U:[]]
[M:7 W/V:2.5/56 U:[]]

Best Move: 0

O..
.X.
...

[M:7 W/V:3.5/9 U:[]]
[M:3 W/V:5.5/11 U:[]]
[M:5 W/V:9.0/14 U:[]]
[M:8 W/V:9.0/14 U:[]]
[

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#ix-indexer-is-deprecated


In [0]:
#Exportin the dataset to a csv file
from google.colab import files

data_df.to_csv('dataset_assigment1.csv') 
files.download('dataset_assigment1.csv')