In [48]:
import numpy as np
import random
import copy
from collections import Counter
# Note: To use this import: pip install ipynb
from ipynb.fs.full.mahjong_rules import Tile, Draw_Pile, Discard_Pile, Player, score_hand, display_tiles

### Initialize Mahjong Game

In [37]:
def initialize_game_state():
    # Initialize the starting draw pile and shuffle it
    # Note: It is stored as draw_pile.deck
    draw_pile = Draw_Pile()
    random.shuffle(draw_pile.deck)

    # Give player one 13 + 1 starting tiles
    # Note: It is stored as player1.hand
    player1 = Player()
    for n in range(14):
        player1.hand.append(draw_pile.deal())

    discard_pile = Discard_Pile()

    return Game_State(player1=player1, discard_pile=discard_pile, draw_pile=draw_pile)


### Deal Tile

In [15]:
# Initialize a game state object, consists of player hand, discard pile and draw pile
class Game_State(object):
    def __init__(self, player1, discard_pile, draw_pile):
        self.player1 = player1
        self.discard_pile = discard_pile
        self.draw_pile = draw_pile
        
    def get_possible_actions():
        return self.player1.hand

def update_state(state,discard_tile):
    # Discard the tile from the player hand to the discard pile
    state.discard_pile.deck.append(state.player1.discard(discard_tile))
    # Draw a new tile from the draw pile into the player hand
    state.player1.hand.append(state.draw_pile.deal())
    return state        
        
# Use this function to output the entire state to see 
# Displays player hand & discard pile
# Does not display draw pile (only shows how many tiles remaining)
def display_state(state):
    print('~Player Hand~')
    display_tiles(state.player1.hand)
    print('--------------')
    print('~Discard Pile~')
    display_tiles(state.discard_pile.deck)
    print('--------------')
    print('~Draw Pile~')
    print('tile count =',len(state.draw_pile))

In [50]:
state1 = copy.deepcopy(game_state)
random.shuffle(state1.player1.hand)
display_tiles(state1.player1.hand)


tile count = 14
3 Tong
9 Wan
2 Suo
0 Bai_Ban
4 Wan
1 Wan
6 Wan
2 Wan
0 Xi
2 Wan
1 Tong
7 Suo
6 Wan
2 Wan


In [53]:
random_seed = random.choices(np.arange(33*4),k=14)
state = np.zeros([33,4])
for i in random_seed:
    # int(i/4) gives tile no., i%4 gives copy no.
    state[int(i/4),i%4]=1
s_hash = hash(state.tobytes())
s_hash

7947700045660605086

In [None]:
class Policy(object):
    def __init__(self,n_tiles):
    self.N = []
    self.state_dict = {}
    self.N_dict = {}
    self.n_tiles = n_tiles
    self.exploration_parameter = 1
    self.depth = 5
    
def MonteCarloTreeSearch(state,m_simulations):
    # define policy globally so that dont need to keep passing it around
    global policy
    policy = Policy(n_tiles = len(state)*len(state[0]))
    
    for k in range(m_simulations):
        simulate(state, depth)
    
    # state = 2D array representing mahjong game state np.array([33,4])
    s_hash = hash(state.tobytes()) # s_hash = state converted to bytes (e.g., -6026512037226545482, 7947700045660605086)
    s = N_dict[s_hash] # s = index (e.g., 1,2,3,..)
    
    # Return a list of numbers between 0 and 33*4 that I can select from
    possible_a = get_possible_actions(state)
    
    # policy.Q[s,possible_a] is updated globally in the "simulate" function
    optimal_action = np.argmax(policy.Q[s,possible_a])
    
    return optimal_action

def simulate(state, depth):
    if is_winning_hand(state): # NEED FUNCTION #
        return get_reward(state) # NEED FUNCTION #

    if depth <= 0:
        return q
    
    # Return a list of numbers between 0 and 33*4 that I can select from
    possible_actions = get_possible_actions(state) # NEED FUNCTION #
    
    s_hash = hash(state.tobytes())
    if s_hash not in policy.N_dict.keys():
        new_index = max(N_dict.values())+1
        policy.state_dict.update({new_index:state})
        policy.N_dict.update({s_hash:new_index})
        policy.N.append(np.zeros(policy.n_tiles))
        policy.Q.append(np.zeros(policy.n_tiles))
    s = N_dict[s_hash]
    
    discard_tile_action = explore(state)
    
    # Return a new state - discard tile and draw tile. Also return the reward for that state
    next_state, reward = update_state(state, discard_tile_action) # NEED FUNCTION #
    
    q = reward + gamma*simulate(next_state, depth-1)
    policy.N[s,a] += 1
    policy.Q[s,a] += (q-Q[s,a])/policy.N[s,a]
    return q

# np.inf*(N[s,a] == 0) returns infinity if the state has yet to be explored
def exploration_bonus(Nsa,Ns): 
    return np.inf*(Nsa == 0) + np.sqrt(np.log(Ns)/Nsa)

# equation (9.1) - Q(s,a)+c*sqrt(logN(s)/N(s,a))
def explore(s,possible_a):
    Ns = np.sum(policy.N[s,possible_a])
    policy.Q[s,a] = policy.exploration_parameter * exploration_bonus(policy.N[s,a],Ns)
    return np.argmax(policy.Q[s,possible_a])

def random_policy(state):
    while not is_winning_hand(state):
        try:
            state = choose_random_action(state)
        except IndexError:
            raise Exception("Non-terminal state has no possible actions: " + str(state))
    return get_reward(state)