In [53]:
import numpy as np
import random
import copy
from collections import Counter
# Note: To use this import: pip install ipynb
from ipynb.fs.full.mahjong_rules import Tile, Draw_Pile, Discard_Pile, Player, score_hand, display_tiles

### Hyperparameters

In [4]:
max_depth = 10
exploration_parameter = 100


### Initialize Mahjong Game

In [37]:
def initialize_game_state():
    # Initialize the starting draw pile and shuffle it
    # Note: It is stored as draw_pile.deck
    draw_pile = Draw_Pile()
    random.shuffle(draw_pile.deck)

    # Give player one 13 + 1 starting tiles
    # Note: It is stored as player1.hand
    player1 = Player()
    for n in range(14):
        player1.hand.append(draw_pile.deal())

    discard_pile = Discard_Pile()

    return Game_State(player1=player1, discard_pile=discard_pile, draw_pile=draw_pile)


### Deal Tile

In [7]:
tile = 2
discard_pile.deck.append(player1.discard(tile))

In [47]:
def compare(array1, array2):
    return Counter(array1) == Counter(array2)

In [49]:
a = [1,2]
b = [2,1]
compare(a,b)

True

In [15]:
# Initialize a game state object, consists of player hand, discard pile and draw pile
class Game_State(object):
    def __init__(self, player1, discard_pile, draw_pile):
        self.player1 = player1
        self.discard_pile = discard_pile
        self.draw_pile = draw_pile
        
    def get_possible_actions():
        return self.player1.hand

def update_state(state,discard_tile):
    # Discard the tile from the player hand to the discard pile
    state.discard_pile.deck.append(state.player1.discard(discard_tile))
    # Draw a new tile from the draw pile into the player hand
    state.player1.hand.append(state.draw_pile.deal())
    return state        
        
# Use this function to output the entire state to see 
# Displays player hand & discard pile
# Does not display draw pile (only shows how many tiles remaining)
def display_state(state):
    print('~Player Hand~')
    display_tiles(state.player1.hand)
    print('--------------')
    print('~Discard Pile~')
    display_tiles(state.discard_pile.deck)
    print('--------------')
    print('~Draw Pile~')
    print('tile count =',len(state.draw_pile))

In [25]:
game_state.player1.hand

In [38]:
# Initialize a game state object, consists of player hand, discard pile and draw pile
game_state = initialize_game_state()

In [48]:
compare(game_state.player1.hand,state1.player1.hand)

False

In [51]:
display_tiles(game_state.player1.hand)

tile count = 14
1 Wan
4 Wan
2 Wan
0 Bai_Ban
6 Wan
2 Wan
7 Suo
2 Wan
1 Tong
6 Wan
3 Tong
0 Xi
2 Suo
9 Wan


In [50]:
state1 = copy.deepcopy(game_state)
random.shuffle(state1.player1.hand)
display_tiles(state1.player1.hand)


tile count = 14
3 Tong
9 Wan
2 Suo
0 Bai_Ban
4 Wan
1 Wan
6 Wan
2 Wan
0 Xi
2 Wan
1 Tong
7 Suo
6 Wan
2 Wan


In [47]:
discard_tile = 0
update_state(game_state,discard_tile)
display_state(game_state)


~Player Hand~
tile count = 13
8 Tong
4 Tong
6 Wan
7 Tong
1 Tong
8 Wan
0 Bei
0 Fa_Cai
5 Wan
0 Bai_Ban
7 Suo
9 Wan
0 Fa_Cai
--------------
~Discard Pile~
tile count = 12
5 Tong
6 Tong
8 Tong
3 Wan
5 Tong
1 Tong
3 Suo
6 Tong
9 Suo
0 Bai_Ban
7 Wan
5 Wan
--------------
~Draw Pile~
tile count = 111


In [52]:
N_dict = {}
N_dict[0] = 

In [94]:
random_seed = random.choices(np.arange(33*4),k=14)
state = np.zeros([33,4])
for i in random_seed:
    # int(i/4) gives tile no., i%4 gives copy no.
    state[int(i/4),i%4]=1
hash(state.tobytes())

3275329101398170520

In [77]:
hash(state2.tobytes())

-2730353257153446200

In [76]:
hash(state.tobytes())

-2730353257153446200

In [81]:
hash(state2.tobytes()) in N_dict.keys()

True

In [56]:
state2 = state.copy()

In [82]:
N_dict

{0: -2730353257153446200, -2730353257153446200: 0}

In [None]:
class Policy(object):
    def __init__(self,n_tiles):
    self.N = []
    self.state_dict = {}
    self.N_dict = {}
    self.n_tiles = n_tiles
    
# define policy globally - anything that is an attribute of policy is global
policy = Policy(n_tiles = 33*4)

# game_state consists of: [state.player1.hand, state.discard_pile.deck, state.draw_pile.deck]
def MonteCarloTreeSearch(state,m_simulations):
    for k in range(m_simulations):
        simulate(state)
    return optimal_action

def simulate(state, depth):
    if is_winning_hand(state):
        print('winning score:',score)
        break

    if depth <= 0:
        return policy.U[s]
    
    s_hash = hash(state.tobytes())
    if s_hash not in policy.N_dict.keys():
        new_index = max(N_dict.values())+1
        policy.state_dict.update({new_index:state})
        policy.N_dict.update({s_hash:new_index})
        policy.N.append(np.zeros(policy.n_tiles))
        policy.Q.append(np.zeros(policy.n_tiles))
    
    if [s, a] in N:
        return U[s]
    
    possible_actions = get_possible_actions(state)
    
    discard_tile_action = explore()
    
    next_state, reward = update_state(state, discard_tile_action)
    
    q = reward + gamma*simulate(next_state, depth-1)
    policy.N[s,a] += 1
    policy.Q[s,a] += (q-Q[s,a])/policy.N[s,a]
    return q

# action = explore(policy, state)

# np.inf*(N[s,a] == 0) returns infinity if the state has yet to be explored
def exploration_bonus(Nsa,Ns): 
    return np.inf*(Nsa == 0) + np.sqrt(np.log(Ns)/Nsa)

# equation (9.1) - Q(s,a)+c*sqrt(logN(s)/N(s,a))
def explore(state,):
    Ns = np.sum(policy.N[s,:])
    return np.argmax(Q[state,action] = exploration_parameter * exploration_bonus(policy.N[s,a],Ns))

def random_policy(state):
    while not is_winning_hand(state):
        try:
            state = choose_random_action(state)
        except IndexError:
            raise Exception("Non-terminal state has no possible actions: " + str(state))
    return get_reward(state)