# Predict

In [38]:
def generate_input(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0, opponent_hand_cards=None):
    obs = [np.zeros([4, 8, 2]), np.zeros([4])]
    
    for card in hand_cards:
        obs[0][card.color.value][card.value.value][0] = 1
        
    if table_card is not None:
        obs[0][table_card.color.value][table_card.value.value][1] = 1

    obs[1][0] = (own_tricks == 1 or own_tricks == 3)
    obs[1][1] = (own_tricks == 2 or own_tricks == 3)

    obs[1][2] = (opponent_tricks == 1 or opponent_tricks == 3)
    obs[1][3] = (opponent_tricks == 2 or opponent_tricks == 3)
    
    return obs

In [14]:
def card(color, value):
    for card in env.cards:
        if card.value == value and card.color == color:
            return card 
        
    return None

In [168]:
def set_env(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0, opponent_hand_cards=None):
    env.reset()
    env.cards_left = env.cards[:]
    random.shuffle(env.cards_left)
    
    for card in hand_cards:
        env.cards_left.remove(card)
    env.players[0].hand_cards = hand_cards[:]
        
    if opponent_hand_cards is None:
        env.players[1].hand_cards = []
        for i in range(len(hand_cards) - (1 if table_card is not None else 0)):
            env.players[1].hand_cards.append(env.cards_left.pop())      
    else:        
        for card in opponent_hand_cards:
            env.cards_left.remove(card)
        env.players[1].hand_cards = opponent_hand_cards[:]
    
    env.players[0].tricks = own_tricks
    env.players[1].tricks = opponent_tricks
    env.table_card = table_card
    

def calc_correct_output_sample(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0, opponent_hand_cards=None):
    global sample_outputs
    
    set_env(hand_cards, table_card, own_tricks, opponent_tricks, opponent_hand_cards)
    
    #root = State(1, env.get_state(), env.current_player)
    #a, p = mcts_game_step(root, 100)
   # env.set_state(root.env_state)
   # print(p)
    #draw_tree(root)
    #print("")
    #draw_path(root)
    #obs, rew, is_done, _ = env.step(hand_cards[0].id)
   # print(rew, is_done)

    #env.render('human')
    #root = State(1, env.get_state())
    #a, p = mcts_game_step(root, 1000, False)
    #draw_tree(root)
    #print("")
    #draw_tree(root.childs[0])
    
    #draw_path(root)
    #reset_samples(1)
    p = search(env.regenerate_obs())[1]
    #postprocess_samples()
        
    return p#sample_outputs[0]

def show_mcts(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0, opponent_hand_cards=None, tree_depth=5, steps=100):
    set_env(hand_cards, table_card, own_tricks, opponent_tricks, opponent_hand_cards)
    root = State(1, env.get_state(), env.current_player)
    a, p = mcts_game_step(root, steps)
    env.set_state(root.env_state)
    #draw_tree(root, tree_depth)
    return p

In [16]:
def calc_correct_output(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0):    
    correct_output = None
    for i in range(100):
        sample_outputs = calc_correct_output_sample(hand_cards, table_card, own_tricks, opponent_tricks)
        if correct_output is None:
            correct_output = sample_outputs
        else:
            correct_output += sample_outputs
    return correct_output / 100

In [17]:
def predict(hand_cards, table_card=None, own_tricks=0, opponent_tricks=0):
    input = generate_input(hand_cards, table_card, own_tricks, opponent_tricks)
    output = model.predict_single(input)[0]
    correct_output = calc_correct_output_sample(hand_cards, table_card, own_tricks, opponent_tricks)
    print(output,correct_output)
    id = 0
    for card in hand_cards:
        print(str(card.color) + " " + str(card.value) + " => " + str(output[card.id]) + " (" + str(correct_output[id]) + ")")
        id += 1

In [253]:
def generate_random_game():
    own_tricks = random.randint(0, 2)
    opponent_tricks = random.randint(0, 2)
    table_card_enabled = random.randint(0, 1)
    hand_cards = random.sample(env.cards, (5 - own_tricks - opponent_tricks) * 2)
    table_card = hand_cards.pop() if table_card_enabled else None
    
    opponent_hand_cards = []
    for i in range(len(hand_cards) // 2):
        opponent_hand_cards.append(hand_cards.pop())
        
    return hand_cards, table_card, own_tricks, opponent_tricks, opponent_hand_cards

def generate_game_from_obs(obs):
    own_tricks = 0
    if obs[1][0]:
        own_tricks += 1
    if obs[1][1]:
        own_tricks += 2   
        
    opponent_tricks = 0
    if obs[1][2]:
        opponent_tricks += 1
    if obs[1][3]:
        opponent_tricks += 2    
        
    cards_left = env.cards[:]
    random.shuffle(cards_left)
    hand_cards = []
    table_card = None
    for c in Color:
        for v in Value: 
            if obs[0][c.value][v.value][0]:
                hand_cards.append(card(c, v))
                cards_left.remove(hand_cards[-1])
            if obs[0][c.value][v.value][1]:
                table_card = card(c, v)
                cards_left.remove(table_card)
    
    opponent_hand_cards = []
    for i in range(5 - own_tricks - opponent_tricks - (1 if table_card is not None else 0)):
        opponent_hand_cards.append(cards_left.pop())
        
    return hand_cards, table_card, own_tricks, opponent_tricks, opponent_hand_cards

def create_eval_cache():
    eval_cache_input = []
    eval_cache_output = []
    for i in range(1000):
        game = generate_random_game()
        eval_cache_input.append(generate_input(game))
        output = calc_correct_output(game)
        
        outputs_per_card = np.zeros((32,))
        for i in range(len(hand_cards)):
            outputs_per_card[hand_cards[i].id] = output[i]
            
        eval_cache_output.append(outputs_per_card)
        
    return eval_cache_input, eval_cache_output
        
def eval(model, eval_cache_input, eval_cache_output):
    correct = 0
    for i in range(len(eval_cache_input)):

        output = model.predict_single(eval_cache_input[i])[0]
        correct_output = eval_cache_output[i]
                    
        correct += correct_output[np.argmax(output)] == correct_output[np.argmax(correct_output)]
        #if correct_output[np.argmax(output)] != correct_output[np.argmax(correct_output)]:
         #   print(i, correct_output[np.argmax(output)], correct_output[np.argmax(correct_output)])
            
    return correct / len(eval_cache_input)


In [42]:
eval_cache_input, eval_cache_output = create_eval_cache()

In [18]:
with open('eval_cache', 'wb') as fp:
    pickle.dump([eval_cache_input, eval_cache_output], fp)

In [22]:
with open ('eval_cache', 'rb') as fp:
    eval_cache_input, eval_cache_output = pickle.load(fp)

In [263]:
eval(model, eval_cache_input, eval_cache_output)

3 0.57 0.59
13 0.55 0.57
14 0.21 0.39
27 0.37 0.4
28 0.35 0.44
54 0.17 0.38
97 0.01 0.02


0.93

In [267]:
eval_cache_input[14]

[array([[[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[1., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [1., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [1., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]]]), array([0., 1., 0., 0.])]

In [265]:
eval_cache_output[14]

array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.21,
       0.  , 0.  , 0.  , 0.  , 0.16, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.39, 0.  , 0.  ])

In [303]:
val = []
for i in range(10):
    val.append(calc_correct_output_sample([
            card(Color.GRUEN, Value.SIEBEN), 
            card(Color.GRUEN, Value.OBER), 
            card(Color.SCHELLN, Value.NEUN)
        ], None, 2, 0))
np.mean(val, 0)


array([0.29233535, 0.30318181, 0.40448284])

In [113]:
env.set_state(my_state)
env.render('human')

True

In [285]:
calc_correct_output_sample([
            card(Color.GRUEN, Value.SIEBEN), 
            card(Color.GRUEN, Value.OBER), 
            card(Color.SCHELLN, Value.NEUN)
        ], None, 2, 0)

[0.23458585 0.21142988 0.55398427]




array([0., 0., 1.])

In [255]:
predict([
            card(Color.GRUEN, Value.SIEBEN), 
            card(Color.GRUEN, Value.KOENIG),
            card(Color.HERZ, Value.KOENIG)
        ], card(Color.EICHEL, Value.ZEHN), 1, 1)

[0.39571235 0.33549584 0.26879181]




[0.00219368 0.00295891 0.00492629 0.00228125 0.00067742 0.0019176
 0.00160495 0.00306738 0.00170262 0.2736106  0.00104753 0.00065809
 0.00124946 0.00122479 0.00083221 0.34336203 0.00157421 0.35396522
 0.0022292  0.00107221 0.0017584  0.00199055 0.0022874  0.00158446
 0.00120892 0.00238398 0.00235519 0.00170699 0.0021641  0.00231255
 0.00093804 0.00114301] [0. 0. 0.]
Color.GRUEN Value.SIEBEN => 0.34336203 (0.0)
Color.GRUEN Value.KOENIG => 0.2736106 (0.0)
Color.HERZ Value.KOENIG => 0.35396522 (0.0)


In [257]:
predict([
    card(Color.EICHEL, Value.UNTER), 
    card(Color.GRUEN, Value.KOENIG),
    card(Color.EICHEL, Value.KOENIG),
    card(Color.HERZ, Value.SIEBEN),
    card(Color.GRUEN, Value.ZEHN),
    ],card(Color.GRUEN, Value.OBER), 0, 0)

[0.16209546 0.29126847 0.14643211 0.25181529 0.14838867]




[0.00225554 0.12882802 0.00333104 0.16498454 0.00407682 0.00374047
 0.00184983 0.01050379 0.00643561 0.36693758 0.00278587 0.00835195
 0.22336954 0.00493929 0.00303816 0.00170397 0.00097518 0.0054345
 0.00415214 0.00438484 0.00151944 0.00706366 0.00218979 0.21273391
 0.0010833  0.00248363 0.00400397 0.00299505 0.00209963 0.00448016
 0.00377667 0.00315689] [0. 1. 0. 1. 1.]
Color.EICHEL Value.UNTER => 0.16498454 (0.0)
Color.GRUEN Value.KOENIG => 0.36693758 (1.0)
Color.EICHEL Value.KOENIG => 0.12882802 (0.0)
Color.HERZ Value.SIEBEN => 0.21273391 (1.0)
Color.GRUEN Value.ZEHN => 0.22336954 (1.0)


In [261]:
predict([
    card(Color.HERZ, Value.SIEBEN), 
    card(Color.GRUEN, Value.KOENIG),
    card(Color.HERZ, Value.ACHT),
    card(Color.EICHEL, Value.SAU),
    card(Color.GRUEN, Value.UNTER),
    ],card(Color.SCHELLN, Value.OBER), 0, 0)

[0.28631919 0.15695597 0.25385176 0.14562401 0.15724907]




[0.21805565 0.00234719 0.00126939 0.00292074 0.00098915 0.00168285
 0.00176553 0.00741228 0.00306193 0.20693102 0.00235981 0.17444488
 0.00192268 0.00234325 0.00263577 0.00079589 0.00064911 0.00227424
 0.00357403 0.00439737 0.00371622 0.00286246 0.19036686 0.19911051
 0.00340857 0.0075426  0.0011064  0.00157739 0.00197756 0.00082587
 0.00150106 0.00150635] [0. 0. 0. 0. 0.]
Color.HERZ Value.SIEBEN => 0.19911051 (0.0)
Color.GRUEN Value.KOENIG => 0.20693102 (0.0)
Color.HERZ Value.ACHT => 0.19036686 (0.0)
Color.EICHEL Value.SAU => 0.21805565 (0.0)
Color.GRUEN Value.UNTER => 0.17444488 (0.0)


In [395]:
hard_input = generate_input([
    card(Color.EICHEL, Value.UNTER), 
    #card(Color.GRUEN, Value.KOENIG),
    card(Color.EICHEL, Value.KOENIG),
    card(Color.HERZ, Value.SIEBEN),
    card(Color.GRUEN, Value.ZEHN),
    ],None, 1, 0)
model.predict_single(hard_input)

[array([3.2768894e-03, 1.9878964e-01, 1.3820220e-02, 4.6424335e-01,
        7.1053780e-03, 2.9274071e-02, 8.1222923e-03, 5.4989127e-04,
        8.0019347e-03, 4.3923180e-03, 4.0565468e-03, 3.2476720e-03,
        2.0850727e-01, 4.3509626e-03, 5.9748022e-03, 6.1062318e-03,
        1.3822509e-03, 2.1725877e-04, 1.3098384e-03, 7.1340741e-04,
        2.0487478e-03, 3.3812402e-03, 1.0878793e-04, 7.3019944e-02,
        3.4260689e-03, 2.7179192e-03, 1.3691070e-03, 6.1381124e-03,
        4.5132274e-03, 4.4453251e-03, 2.2470709e-03, 4.3023235e-04],
       dtype=float32), array([0.5526783], dtype=float32)]

In [None]:
for sample_input in sample_inputs[0]:
    if np.array_equal(sample_input,hard_input[0]):
        print("test")

In [80]:
compare(model, None)

0.48

In [72]:
match([model, None], True)

[0. 0. 1. 0. 0.]
[1. 1. 0. 1.]
[1. 1. 0.]
[1. 0.]
[1.]


1