# Train

In [3]:
fig, ax = plt.subplots()

In [2]:
%%cython -f -I . -L . -L /home/domin/.local/lib/python3.6/site-packages -I /home/domin/.local/lib/python3.6/site-packages  --cplus 

from src.Storage cimport Storage
from src.MCTS cimport MCTS
from src.Model cimport Model
from src.ModelRating cimport ModelRating
from src.Game cimport Game
from gym_watten.envs.watten_env cimport WattenEnv
from IPython.display import clear_output
from libcpp.vector cimport vector
import tensorflow as tf
from libc.stdlib cimport srand
from libc.time cimport time
import sys

cpdef float train(WattenEnv env, Model model, Model trained_model, Model best_model, Storage storage, MCTS mcts, Game game, ModelRating rating, int train_sample_size, object summary_writer):
    cdef int g
    cdef vector[float] eval_scores 
    cdef float rating_value, exploitability
    
    srand(time(NULL))
    
    for g in range(300):  
        mcts.mcts_generate(env, model, storage)
    
        trained_model.memorize_storage(storage, train_sample_size == 0, 1, train_sample_size)
        #clear_output()
        
        
        """if hasattr(model, 'fit'):
            p = np.random.permutation(int(number_of_samples))
            if len(p) > BATCH_SIZE * TRAINING_LOOPS:
                p = p[:BATCH_SIZE * TRAINING_LOOPS]
            train_outputs = []
            train_inputs = []
            for i in range(len(sample_outputs)):
                train_outputs.append(sample_outputs[i][p])
            for i in range(len(sample_inputs)):
                train_inputs.append(sample_inputs[i][p])

            trained_model.fit(train_inputs, train_outputs, epochs=1, batch_size=BATCH_SIZE)   """

        if g % 1 == 0:
            model.copy_weights_from(trained_model)     
            rating_value = game.compare_given_games(model, best_model, rating)
            print("Match: " + str(rating_value))
            sys.stdout.flush()
            #print("Last scores: " + str(eval_scores) + "!")
            if rating_value > 0.5:
                best_model.copy_weights_from(model)  
                s = tf.Summary(value=[tf.Summary.Value(tag="exploitability", simple_value=rating.calc_exploitability(best_model))])
                summary_writer.add_summary(s, g)
                summary_writer.flush()
                #exploitability = rating.calc_exploitability(best_model)
                #eval_scores.push_back(exploitability)
                #print("Model score: " + str(eval_scores.back()) + "!")
    
    s = tf.Summary(value=[tf.Summary.Value(tag="exploitability", simple_value=rating.calc_exploitability(best_model))])
    summary_writer.add_summary(s, g)
    summary_writer.flush()
    
    return rating.calc_exploitability(best_model)
    #plt.plot(eval_scores, label='6x75')
    #ax.legend(shadow=True)
    #plt.show()


In [None]:
for i in range(10):
    print(train(env, model, best_model, storage, mcts, game, rating))

In [None]:
summary_writer = tf.summary.FileWriter("./results/" + str("test"))
train(env, model, best_model, storage, mcts, game, rating, summary_writer)

In [18]:
rating.calc_exploitability(model)

0.03214285895228386

In [284]:
game.compare_given_games(model, best_model, rating)

0.5

In [3]:
import itertools
def create_grid_search(variables):
    combinations = itertools.product(*variables.values())
    labeled_combinations = []
    for combination in combinations:
        labeled_combinations.append(dict(zip(variables.keys(), combination)))
    return labeled_combinations

In [4]:
variables= {"episodes": [75], "mcts_sims": [40], "objective_opponent": [False], "storage_size": [0], "sample_size": [0], "hidden_neurons": [128], "model": ["Keras"]}

results = []
n = 1
best_models = []

for combination in tqdm(create_grid_search(variables)):
    summary_writer = tf.summary.FileWriter("./results/nn-keras-" + str(combination) + " (try 0)")
    score = 0
    for i in range(n):
        env = WattenEnv()
        if combination["model"] is "Keras":
            model = KerasModel(combination["hidden_neurons"])#LookUp()
            best_model = KerasModel(combination["hidden_neurons"])#LookUp()
            train_model = KerasModel(combination["hidden_neurons"])#LookUp()
        else:
            model = TinyDnnModel(combination["hidden_neurons"])#LookUp()
            best_model = TinyDnnModel(combination["hidden_neurons"])#LookUp()
            train_model = TinyDnnModel(combination["hidden_neurons"])#LookUp()
        storage = Storage(combination["storage_size"])
        mcts = MCTS(combination["episodes"], combination["mcts_sims"], combination["objective_opponent"])
        rating = ModelRating(env)
        game = Game(env)
        score += train(env, model, train_model, best_model, storage, mcts, game, rating, combination["sample_size"], summary_writer)
    results.append([combination, score / n])
    best_models.append(best_model)
for result in results:
    print(result[0], result[1])

  0%|          | 0/1 [00:00<?, ?it/s]

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Epoch 1/1
Match: 0.4526785612106323
Epoch 1/1
Match: 0.4571428596973419
Epoch 1/1
Match: 0.45892858505249023
Epoch 1/1
Match: 0.4580357074737549
Epoch 1/1
Match: 0.4625000059604645
Epoch 1/1
Match: 0.4616071283817291
Epoch 1/1
Match: 0.4642857015132904
Epoch 1/1
Match: 0.46875
Epoch 1/1
Match: 0.46964284777641296
Epoch 1/1
Match: 0.4732142984867096
Epoch 1/1
Match: 0.47232142090797424
Epoch 1/1
Match: 0.46964284777641296
Epoch 1/1
Match: 0.46964284777641296
Epoch 1/1
Match: 0.46964284777641296
Epoch 1/1
Match: 0.4732142984867096
Epoch 1/1
Match: 0.4732142984867096
Epoch 1/1
Match: 0.4732142984867096
Epoch 1/1
Match: 0.47678571939468384
Epoch 1/1
Match: 0.4839285612106323
Epoch 1/1
Match: 0.4821428656578064
Epoch 1/1
Match: 0.4821428656578064
Epoch 1/1
Match: 0.4821428656578064
Epoch 1/1
Match: 0.4866071343421936
Epoch 1/1
Match: 0.48035714030

100%|██████████| 1/1 [21:33<00:00, 1293.75s/it]

{'episodes': 75, 'mcts_sims': 40, 'objective_opponent': False, 'storage_size': 0, 'sample_size': 0, 'hidden_neurons': 128, 'model': 'Keras'} 0.512499988079071





In [14]:
for i in range(len(best_models)):
    wins = 0
    for j in range(len(best_models)):
        if game.compare_given_games(best_models[i], best_models[j], rating) > 0.5:
            wins += 1
    print(i, wins)

0 4
1 1
2 8
3 13
4 9
5 20
6 10
7 17
8 1
9 1
10 11
11 10
12 11
13 19
14 12
15 19
16 0
17 5
18 12
19 17
20 12
21 21
22 6
23 23


In [11]:
%%cython -f -I . -L . -L /home/domin/.local/lib/python3.6/site-packages -I /home/domin/.local/lib/python3.6/site-packages  --cplus 

from src.MCTS cimport MCTS, Storage, MCTSState
from src.LookUp cimport LookUp, ModelOutput
from src.ModelRating cimport ModelRating
from src.Game cimport Game
from gym_watten.envs.watten_env cimport WattenEnv, Observation
from IPython.display import clear_output
from libcpp.vector cimport vector

cpdef void show_flaws(WattenEnv env, LookUp model, LookUp better_model, ModelRating rating, int start=0):
    cdef Observation obs
    cdef ModelOutput output, better_output
    cdef int step, better_step
    
    for g in range(start, rating.eval_games.size()):  
        env.reset()
        env.set_state(&rating.eval_games[g])
        env.regenerate_obs(&obs)
    
        model.predict_single(&obs, &output)
        step = model.valid_step(output.p, &env.players[env.current_player].hand_cards)
        
        better_model.predict_single(&obs, &better_output)
        better_step = better_model.valid_step(better_output.p, &env.players[env.current_player].hand_cards)
        
        if better_step != step:
            env.render('human')
            print(g, step, better_step, output.p, better_output.p)
            break
            
cpdef void analyse(WattenEnv env, LookUp model, MCTS mcts, ModelRating rating, int game_id, draw=True):
    env.reset()
    env.set_state(&rating.eval_games[game_id])
    #env.step(0)
    #env.step(4)
    #env.step(2)
    #env.step(1)
    env.render('human')
    #print(env.is_done())
    mcts.objective_opponent = True
    
    cdef MCTSState root = mcts.create_root_state(env)
    cdef vector[float] p
    mcts.mcts_game_step(env, &root, model, &p, 40)
    print(p)
    
    if draw:
        mcts.draw_tree(&root, 6)

cpdef void run(WattenEnv env, LookUp model, ModelRating rating, int game_id):
    cdef Observation obs
    cdef ModelOutput output
    
    env.reset()
    env.set_state(&rating.eval_games[game_id])
    env.regenerate_obs(&obs)
    
    model.predict_single(&obs, &output)
    print(output.p)

In [32]:
show_flaws(env, best_models[10], best_models[11], rating, 20)

60 3 2 [0.30431854724884033, 0.0, 0.28242039680480957, 0.41326066851615906, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] [0.302650511264801, 0.0, 0.41321861743927, 0.28413063287734985, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [12]:
for i in range(60, 70):
    analyse(env, best_models[2], mcts, rating, i, False)

[0.42824968695640564, 0.32129472494125366, 0.2504556179046631]
[0.31789714097976685, 0.37508949637413025, 0.3070133626461029]
[0.4029049277305603, 0.2813130021095276, 0.3157820701599121]
[0.4055160582065582, 0.3133985996246338, 0.281085342168808]
[0.3218088448047638, 0.3863930106163025, 0.29179811477661133]
[0.40328505635261536, 0.25039905309677124, 0.3463159203529358]
[0.40066438913345337, 0.16118615865707397, 0.43814942240715027]
[0.4009774327278137, 0.16110198199748993, 0.43792060017585754]
[0.24658171832561493, 0.2084484100341797, 0.5449698567390442]
[0.22409316897392273, 0.21404504776000977, 0.5618617534637451]


In [13]:
analyse(env, best_models[2], mcts, rating, 61)

[0.2822434902191162, 0.4731760025024414, 0.24458058178424835]


In [23]:
run(env, best_models[7], rating, 69)

[0.2860535979270935, 0.0, 0.3563016653060913, 0.35764503479003906, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [None]:
env = WattenEnv()
model = LookUp()
best_model = LookUp()
storage = Storage()
mcts = MCTS()
rating = ModelRating(env)
game = Game(env)

mcts.mcts_generate(env, model, storage)
model.memorize_storage(storage)
game.compare_given_games(model, best_model, rating)
rating.calc_exploitability(model)

In [4]:
for i in range(10):
    print(game.compare_given_games(best_models[0], best_models[i], rating, False))

NameError: name 'game' is not defined

In [25]:
rating.calc_exploitability(best_model)

0.06607142835855484

In [4]:
%lprun -f game.match game.compare_given_games(model, best_model, rating)

Timer unit: 1e-06 s

Total time: 0.020369 s
File: src/Game.pyx
Function: match at line 16

Line #      Hits         Time  Per Hit   % Time  Line Contents
    16                                               cpdef int match(self, LookUp agent1, LookUp agent2, bool render=False, bool reset=True):
    17                                                   cdef Observation obs
    18                                                   cdef ModelOutput output
    19                                                   cdef int a
    20      1120        696.0      0.6      3.4          if reset:
    21                                                       self.env.reset(&obs)
    22                                                   else:
    23      1120        623.0      0.6      3.1              self.env.regenerate_obs(&obs)
    24                                           
    25      1120        560.0      0.5      2.7          while not self.env.is_done():
    26      4720       2263.0      0.

In [4]:
import pstats, cProfile

cProfile.runctx("game.compare_given_games(model, best_model, rating)", globals(), locals(), "Profile.prof")

s = pstats.Stats("Profile.prof")
s.print_stats()

Sat Mar 24 00:38:45 2018    Profile.prof

         4 function calls in 0.007 seconds

   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.007    0.007 {built-in method builtins.exec}
        1    0.007    0.007    0.007    0.007 {method 'compare_given_games' of 'src.Game.Game' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.007    0.007 <string>:1(<module>)




<pstats.Stats at 0x7f680c15bc88>

In [5]:
%lprun -f "mcts.mcts_sample" train(env, model, storage, mcts)

  profile = LineProfiler(*funcs)


NameError: name 'train' is not defined

In [62]:
model.set_weights(trained_model.get_weights())

In [92]:
eval(models[1], eval_games)

0.690327380952381

In [272]:
all_eval_scores

[0.02589285714285713]

In [251]:
sample_outputs[1][9]

array([1.])

In [209]:
compare_given_games(model, first_model, eval_games)

0.5410714285714285

In [200]:
model.predict_single([train_inputs[0][next_index - 2], train_inputs[1][next_index - 2]])

NameError: name 'train_inputs' is not defined

In [45]:
train_outputs[0][next_index - 2]

array([0.258651  , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.27687292, 0.        , 0.        , 0.        ,
       0.46447608, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        ])

In [46]:
train_inputs[0][next_index - 2]

array([[[0, 1],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [1, 0]],

       [[1, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [1, 0],
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0]],

       [[0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0],
        [0, 0]]])

In [111]:
test_model = build_model()

In [72]:
K.set_value(test_model.optimizer.lr, 0.0001)

In [132]:
test_model.fit([sample_inputs[0][:number_of_samples], sample_inputs[1][:number_of_samples]], [sample_outputs[0][:number_of_samples], sample_outputs[1][:number_of_samples]], epochs=10, batch_size=BATCH_SIZE)  

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f44962f95f8>

In [116]:
test_model.predict_single([sample_inputs[0][5], sample_inputs[1][5]])

[array([0.0053017 , 0.0045248 , 0.00548624, 0.00582314, 0.0070843 ,
        0.00583717, 0.00645116, 0.00290738, 0.00513383, 0.0056636 ,
        0.331735  , 0.00607965, 0.0053452 , 0.00601907, 0.33392397,
        0.0070668 , 0.00368037, 0.01111693, 0.00290891, 0.00334794,
        0.00270144, 0.00335149, 0.00295761, 0.00356095, 0.00431994,
        0.00539099, 0.00628497, 0.00557014, 0.3452927 , 0.00649954,
        0.00652416, 0.00288056], dtype=float32),
 array([-0.81022185], dtype=float32)]

In [110]:
sample_outputs[0][10]

37764

In [152]:
sample_outputs[0][10638]

array([0.        , 0.23038077, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.15442885, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.23038077,
       0.        , 0.        , 0.        , 0.23038077, 0.        ,
       0.        , 0.        , 0.        , 0.15442885, 0.        ,
       0.        , 0.        ])

In [97]:
eval(model, eval_cache_input, eval_cache_output)

0.127

In [133]:
model_output = test_model.predict([sample_inputs[0][:number_of_samples], sample_inputs[1][:number_of_samples]])[0]
a = np.argmax(model_output, axis=-1)
k = np.equal(np.take(sample_outputs[0][:number_of_samples], np.argmax(sample_outputs[0][:number_of_samples], axis=-1)), np.take(sample_outputs[0][:number_of_samples], a)).sum()
k / number_of_samples

0.8743247537337147

In [102]:
model.save('modelDense3.h5')

AttributeError: 'LookUp' object has no attribute 'save'

In [82]:
model.load_weights('modelDense2.h5')

In [97]:
compare_given_games(best_model, models[2], eval_games)

IndexError: list index out of range

In [66]:
models.append(best_model)

In [57]:
models = []

In [274]:
first_nn_model = model

In [74]:
eval(best_model, eval_games)

0.690327380952381

In [25]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 4, 8, 2)      0                                            
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 64)           0           input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 4)            0                                            
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 68)           0           flatten_1[0][0]                  
                                                                 input_2[0][0]                    
__________