In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
from time import time
import pprint
pp = pprint.PrettyPrinter(indent=4)
%matplotlib inline

from games import x2048
from agents import DQNagent
plt.rcParams['figure.figsize'] = [15, 10]

Using TensorFlow backend.


In [2]:
class paramgen():
    def __init__(self, hyp_ranges):
        self.range_list = hyp_ranges
    def __iter__(self):
        return self
    def __next__(self):
        randset = []
        for rang in self.range_list:
            if rang[1]:
                randset.append(np.random.normal(loc=rang[0], scale=rang[1]))
            else:
                randset.append(rang[0])
        return randset

In [3]:
dim = 2
n_sets = 128
episodes = 3000

set_decay = 0.5
sigma_decay = 0.66

hyp_ranges = [
    #'learning rate':
    (0.01, 0.02),
    #'batch size':
    (32, 0),
    #'eps settings':
    (1.0, 0.1), #eps0
    (0.01, .0001), #epsmin 
    (0.995, 0.2), #epsdecay
    #'gamma':
    (0.8, 0.1),
    #'memory length':
    (2000, 0)
]

game = x2048(dim)
gen = paramgen(hyp_ranges)

t_scorearr = []
m_scorearr = []
err_scorearr = []

arrs = []
f_arrs = []

In [4]:
j = 0
max_score = 0
max_params = None
c_sets = n_sets
c_hype_ranges = hyp_ranges

while np.floor(c_sets)>=1:
    time_0s = time()
    gen = paramgen(c_hype_ranges)
    
    for i, paramset in enumerate(gen):
        time0 = time()
        agent = DQNagent(game, paramset)
        agent.disableGPU()
        sarr = agent.train(episodes = episodes, savedir='data')[0]

        time1 = time()
        arrs.append(sarr)
        sarr_filtered = gaussian_filter(sarr, 100)
        f_arrs.append(sarr_filtered)
        i_max = np.argmax(sarr_filtered)
        l_file = 'data/eps%i.h5' % i_max
        print(l_file, ' loaded.')
        agent.model = agent.cModel(file=l_file)
        score = agent.eval(500)[0]

        time2 = time()
        print("%i/%i done, t-train:%i, t_eval:%i, t_total:%i" % (i+1, c_sets, time1-time0, time2-time1, time2-time0))
        if score > max_score:
            max_score = score
            max_params = paramset
        print('%sS: AVG: %f @ %i'  % ('New max score! ' if score==max_score else '', score, i_max))
        print(paramset)    
        print()
        if i+1 == c_sets: break
        
    c_hype_ranges = [(mean, hyp_ranges[i][1]) for i,mean in enumerate(max_params)]
    c_sets *= 0.5
    time_1s = time()
    print('Iteration %i done! S: %f T: %f' % (j,max_score, time_1s - time_0s))
    pp.pprint(c_hype_ranges)
    j+=1
    print('#'*33)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Training done!                    
data/eps1438.h5  loaded.
Evaluation done!                    
1/128 done, t-train:96, t_eval:2, t_total:98
New max score! S: AVG: 22.856000 @ 1438
[-0.0024052391239048794, 32, 0.9576670336060975, 0.009994771331769863, 1.1551029799036394, 0.7072397256867595, 2000]

Training done!                    
data/eps1897.h5  loaded.
Evaluation done!                    
2/128 done, t-train:102, t_eval:2, t_total:105
S: AVG: 21.712000 @ 1897
[-0.020997391493189452, 32, 1.0837465759363945, 0.00999762592067113, 1.0361179845647677, 0.8898052472403291, 2000]

Training done!                    
data/eps2404.h5  loaded.
Evaluation done!                    
3/128 done, t-train:107, t_eval:3, t_total:111
S: AVG: 21.824000 @ 2404
[-0.0035448824887519773, 32, 1.0777946997726184, 0.010025997264544175, 0.9991218105592153, 0.8039387545030509, 2000]

Training

Training done!                    
data/eps690.h5  loaded.
Evaluation done!                    
28/128 done, t-train:268, t_eval:12, t_total:280
S: AVG: 33.320000 @ 690
[-0.0033390858500344425, 32, 0.8046059580075673, 0.01006714664639432, 0.6165305152755456, 0.8327498406286177, 2000]

Training done!                    
data/eps2999.h5  loaded.
Evaluation done!                    
29/128 done, t-train:285, t_eval:15, t_total:300
S: AVG: 52.496000 @ 2999
[0.0037736162399090436, 32, 1.0364441422214057, 0.009861622262034044, 0.9869020325242598, 0.8113460006883476, 2000]

Training done!                    
data/eps869.h5  loaded.
Evaluation done!                    
30/128 done, t-train:273, t_eval:16, t_total:290
S: AVG: 59.848000 @ 869
[0.026617564881834754, 32, 1.12381215013149, 0.010018949945086354, 0.5979825644276148, 0.778464297081743, 2000]

Training done!                    
data/eps2270.h5  loaded.
Evaluation done!                    
31/128 done, t-train:272, t_eval:13, t_total:28

Training done!                    
data/eps2577.h5  loaded.
Evaluation done!                    
57/128 done, t-train:406, t_eval:30, t_total:436
S: AVG: 57.944000 @ 2577
[0.02419162647228167, 32, 0.9809967331663517, 0.009980880340075044, 1.1373527731076678, 0.7855834698923186, 2000]

Training done!                    
data/eps1091.h5  loaded.
Evaluation done!                    
58/128 done, t-train:414, t_eval:30, t_total:445
S: AVG: 54.576000 @ 1091
[0.030272136496181774, 32, 0.9914047117020274, 0.01004850390447653, 0.7761306091218687, 0.8979846911903164, 2000]

Training done!                    
data/eps2573.h5  loaded.
Evaluation done!                    
59/128 done, t-train:407, t_eval:28, t_total:435
S: AVG: 35.088000 @ 2573
[0.02569909745616606, 32, 1.0115673142863812, 0.010034246243619071, 1.5219553870118934, 0.7868701212256718, 2000]

Training done!                    
data/eps2999.h5  loaded.
Evaluation done!                    
60/128 done, t-train:418, t_eval:27, t_total:

KeyboardInterrupt: 

In [None]:
[0.013789482254543675, 32, 1.1827441119418947, 0.010043847676676102, 0.7447212942406423, 0.8348185066787255, 2000]


In [7]:
print('Iteration %i done! S: %f T: %f' % (j,max_score, time_0s))
pp.pprint(max_params)

Iteration 0 done! S: 67.392000 T: 1558227853.376808
[   0.013789482254543675,
    32,
    1.1827441119418947,
    0.010043847676676102,
    0.7447212942406423,
    0.8348185066787255,
    2000]
