http://neuro-educator.com/rl1/

In [26]:
# coding:utf-8
# [0]ライブラリのインポート
import gym  #倒立振子(cartpole)の実行環境
from gym import wrappers  #gymの画像保存
import numpy as np
import time


# [1]Q関数を離散化して定義する関数　------------
# 観測した状態を離散値にデジタル変換する
def bins(clip_min, clip_max, num):
    return np.linspace(clip_min, clip_max, num + 1)[1:-1]
# bins(1, 20, 5)
# array([ 4.8,  8.6, 12.4, 16.2])

# 各値を離散値に変換
def digitize_state(observation):
    cart_pos, cart_v, pole_angle, pole_v = observation
    digitized = [
        np.digitize(cart_pos, bins=bins(-2.4, 2.4, num_dizitized)),
        np.digitize(cart_v, bins=bins(-3.0, 3.0, num_dizitized)),
        np.digitize(pole_angle, bins=bins(-0.5, 0.5, num_dizitized)),
        np.digitize(pole_v, bins=bins(-2.0, 2.0, num_dizitized))
    ]
    return sum([x * (num_dizitized**i) for i, x in enumerate(digitized)])

In [27]:
np.digitize(1.5, bins=bins(-3.0, 3.0, 16))
# -3から3まで16分割して、その中て1.5が何番目のグループに入るか

12

In [28]:
# [2]行動a(t)を求める関数 -------------------------------------
def get_action(next_state, episode):
           #徐々に最適行動のみをとる、ε-greedy法
    epsilon = 0.5 * (1 / (episode + 1))
    if epsilon <= np.random.uniform(0, 1):
        next_action = np.argmax(q_table[next_state])
    else:
        next_action = np.random.choice([0, 1])
    return next_action


# [3]Qテーブルを更新する関数 -------------------------------------
def update_Qtable(q_table, state, action, reward, next_state):
    gamma = 0.99
    alpha = 0.5
    next_Max_Q=max(q_table[next_state][0],q_table[next_state][1] )
    q_table[state, action] = (1 - alpha) * q_table[state, action] +\
            alpha * (reward + gamma * next_Max_Q)
   
    return q_table

In [29]:
# [4]. メイン関数開始 パラメータ設定--------------------------------------------------------
env = gym.make('CartPole-v0')
max_number_of_steps = 200  #1試行のstep数
num_consecutive_iterations = 100  #学習完了評価に使用する平均試行回数
num_episodes = 2000  #総試行回数
goal_average_reward = 195  #この報酬を超えると学習終了（中心への制御なし）
# 状態を6分割^（4変数）にデジタル変換してQ関数（表）を作成
num_dizitized = 6  #分割数
q_table = np.random.uniform(
    low=-1, high=1, size=(num_dizitized**4, env.action_space.n))

total_reward_vec = np.zeros(num_consecutive_iterations)  #各試行の報酬を格納
final_x = np.zeros((num_episodes, 1))  #学習後、各試行のt=200でのｘの位置を格納
islearned = 0  #学習が終わったフラグ
isrender = 0  #描画フラグ

In [30]:
q_table.shape

(1296, 2)

In [None]:
# [5] メインルーチン--------------------------------------------------
for episode in range(num_episodes):  #試行数分繰り返す
    # 環境の初期化
    observation = env.reset()
    state = digitize_state(observation)
    action = np.argmax(q_table[state])
    episode_reward = 0

    for t in range(max_number_of_steps):  #1試行のループ
        if islearned == 1:  #学習終了したらcartPoleを描画する
            env.render()
            time.sleep(0.1)
            print (observation[0])  #カートのx位置を出力

        # 行動a_tの実行により、s_{t+1}, r_{t}などを計算する
        observation, reward, done, info = env.step(action)

        # 報酬を設定し与える
        if done:
            if t < 195:
                reward = -200  #こけたら罰則
            else:
                reward = 1  #立ったまま終了時は罰則はなし
        else:
            reward = 1  #各ステップで立ってたら報酬追加

        episode_reward += reward  #報酬を追加

        # 離散状態s_{t+1}を求め、Q関数を更新する
        next_state = digitize_state(observation)  #t+1での観測状態を、離散値に変換
        q_table = update_Qtable(q_table, state, action, reward, next_state)
        
        #  次の行動a_{t+1}を求める 
        action = get_action(next_state, episode)    # a_{t+1} 
        
        state = next_state
        
        #終了時の処理
        if done:
            print('%d Episode finished after %f time steps / mean %f' %
                  (episode, t + 1, total_reward_vec.mean()))
            total_reward_vec = np.hstack((total_reward_vec[1:],
                                          episode_reward))  #報酬を記録
            if islearned == 1:  #学習終わってたら最終のx座標を格納
                final_x[episode, 0] = observation[0]
            break

    if (total_reward_vec.mean() >=
            goal_average_reward):  # 直近の100エピソードが規定報酬以上であれば成功
        print('Episode %d train agent successfuly!' % episode)
        islearned = 1
        #np.savetxt('learned_Q_table.csv',q_table, delimiter=",") #Qtableの保存する場合
        if isrender == 0:
            #env = wrappers.Monitor(env, './movie/cartpole-experiment-1') #動画保存する場合
            isrender = 1
    #10エピソードだけでどんな挙動になるのか見たかったら、以下のコメントを外す
    #if episode>10:
    #    if isrender == 0:
    #        env = wrappers.Monitor(env, './movie/cartpole-experiment-1') #動画保存する場合
    #        isrender = 1
    #    islearned=1;

if islearned:
    np.savetxt('final_x.csv', final_x, delimiter=",")

0 Episode finished after 30.000000 time steps / mean 0.000000
1 Episode finished after 30.000000 time steps / mean -1.710000
2 Episode finished after 16.000000 time steps / mean -3.420000
3 Episode finished after 16.000000 time steps / mean -5.270000
4 Episode finished after 31.000000 time steps / mean -7.120000
5 Episode finished after 10.000000 time steps / mean -8.820000
6 Episode finished after 20.000000 time steps / mean -10.730000
7 Episode finished after 21.000000 time steps / mean -12.540000
8 Episode finished after 10.000000 time steps / mean -14.340000
9 Episode finished after 17.000000 time steps / mean -16.250000
10 Episode finished after 14.000000 time steps / mean -18.090000
11 Episode finished after 32.000000 time steps / mean -19.960000
12 Episode finished after 26.000000 time steps / mean -21.650000
13 Episode finished after 57.000000 time steps / mean -23.400000
14 Episode finished after 12.000000 time steps / mean -24.840000
15 Episode finished after 51.000000 time s

130 Episode finished after 126.000000 time steps / mean -27.280000
131 Episode finished after 195.000000 time steps / mean -27.480000
132 Episode finished after 114.000000 time steps / mean -25.630000
133 Episode finished after 82.000000 time steps / mean -24.590000
134 Episode finished after 148.000000 time steps / mean -23.880000
135 Episode finished after 188.000000 time steps / mean -23.980000
136 Episode finished after 200.000000 time steps / mean -22.420000
137 Episode finished after 200.000000 time steps / mean -22.420000
138 Episode finished after 200.000000 time steps / mean -19.230000
139 Episode finished after 200.000000 time steps / mean -16.690000
140 Episode finished after 200.000000 time steps / mean -13.300000
141 Episode finished after 200.000000 time steps / mean -10.570000
142 Episode finished after 167.000000 time steps / mean -7.580000
143 Episode finished after 139.000000 time steps / mean -7.300000
144 Episode finished after 200.000000 time steps / mean -7.290000

255 Episode finished after 200.000000 time steps / mean 65.810000
256 Episode finished after 200.000000 time steps / mean 68.430000
257 Episode finished after 177.000000 time steps / mean 68.430000
258 Episode finished after 200.000000 time steps / mean 66.190000
259 Episode finished after 180.000000 time steps / mean 68.710000
260 Episode finished after 173.000000 time steps / mean 70.050000
261 Episode finished after 140.000000 time steps / mean 71.520000
262 Episode finished after 200.000000 time steps / mean 72.310000
263 Episode finished after 200.000000 time steps / mean 75.660000
264 Episode finished after 178.000000 time steps / mean 79.450000
265 Episode finished after 200.000000 time steps / mean 80.430000
266 Episode finished after 200.000000 time steps / mean 84.320000
267 Episode finished after 167.000000 time steps / mean 87.370000
268 Episode finished after 173.000000 time steps / mean 88.930000
269 Episode finished after 169.000000 time steps / mean 90.160000
270 Episod

388 Episode finished after 200.000000 time steps / mean 77.270000
389 Episode finished after 200.000000 time steps / mean 77.270000
390 Episode finished after 200.000000 time steps / mean 77.270000
391 Episode finished after 200.000000 time steps / mean 80.050000
392 Episode finished after 189.000000 time steps / mean 82.710000
393 Episode finished after 200.000000 time steps / mean 80.590000
394 Episode finished after 52.000000 time steps / mean 83.500000
395 Episode finished after 111.000000 time steps / mean 82.670000
396 Episode finished after 200.000000 time steps / mean 82.010000
397 Episode finished after 200.000000 time steps / mean 84.540000
398 Episode finished after 200.000000 time steps / mean 86.980000
399 Episode finished after 200.000000 time steps / mean 89.260000
400 Episode finished after 200.000000 time steps / mean 91.470000
401 Episode finished after 47.000000 time steps / mean 91.470000
402 Episode finished after 200.000000 time steps / mean 90.270000
403 Episode 

518 Episode finished after 200.000000 time steps / mean 183.600000
519 Episode finished after 200.000000 time steps / mean 183.600000
520 Episode finished after 200.000000 time steps / mean 183.600000
521 Episode finished after 200.000000 time steps / mean 183.600000
522 Episode finished after 200.000000 time steps / mean 183.600000
523 Episode finished after 200.000000 time steps / mean 183.620000
524 Episode finished after 200.000000 time steps / mean 183.620000
525 Episode finished after 200.000000 time steps / mean 183.620000
526 Episode finished after 199.000000 time steps / mean 183.620000
527 Episode finished after 200.000000 time steps / mean 183.610000
528 Episode finished after 200.000000 time steps / mean 183.610000
529 Episode finished after 187.000000 time steps / mean 183.610000
530 Episode finished after 200.000000 time steps / mean 181.470000
531 Episode finished after 200.000000 time steps / mean 181.470000
532 Episode finished after 200.000000 time steps / mean 183.59

644 Episode finished after 200.000000 time steps / mean 168.970000
645 Episode finished after 200.000000 time steps / mean 168.970000
646 Episode finished after 177.000000 time steps / mean 168.970000
647 Episode finished after 200.000000 time steps / mean 166.730000
648 Episode finished after 184.000000 time steps / mean 166.730000
649 Episode finished after 200.000000 time steps / mean 164.560000
650 Episode finished after 200.000000 time steps / mean 166.970000
651 Episode finished after 142.000000 time steps / mean 166.970000
652 Episode finished after 200.000000 time steps / mean 164.380000
653 Episode finished after 200.000000 time steps / mean 164.380000
654 Episode finished after 200.000000 time steps / mean 164.380000
655 Episode finished after 161.000000 time steps / mean 167.240000
656 Episode finished after 200.000000 time steps / mean 164.840000
657 Episode finished after 170.000000 time steps / mean 164.840000
658 Episode finished after 200.000000 time steps / mean 162.53

769 Episode finished after 200.000000 time steps / mean 170.520000
770 Episode finished after 200.000000 time steps / mean 172.800000
771 Episode finished after 200.000000 time steps / mean 175.390000
772 Episode finished after 200.000000 time steps / mean 177.800000
773 Episode finished after 200.000000 time steps / mean 177.800000
774 Episode finished after 200.000000 time steps / mean 177.800000
775 Episode finished after 200.000000 time steps / mean 180.430000
776 Episode finished after 200.000000 time steps / mean 182.680000
777 Episode finished after 200.000000 time steps / mean 182.680000
778 Episode finished after 200.000000 time steps / mean 185.160000
779 Episode finished after 200.000000 time steps / mean 187.430000
780 Episode finished after 200.000000 time steps / mean 187.430000
781 Episode finished after 200.000000 time steps / mean 189.590000
782 Episode finished after 200.000000 time steps / mean 189.590000
783 Episode finished after 200.000000 time steps / mean 191.68

-0.16756342302513028
-0.16082227740310623
-0.15799675127100016
-0.1512821190605621
-0.14848269553402985
-0.14179383459778344
-0.1390200684893562
-0.13235688796937886
-0.1296089804973435
-0.12297203164627023
-0.12025082587295079
-0.11364130798061925
-0.11094829921538642
-0.10436808051845968
-0.1017054413263809
-0.09515709487821969
-0.0847249972000987
-0.0782118791238322
-0.06781350418477612
-0.0613331466948051
-0.05096651608178909
-0.044517296386405794
-0.03418123861524771
-0.027762329962790308
-0.017456445941030137
-0.011067787138138538
-0.0007924330553053388
0.005565284171507295
0.015808996706111605
0.029937199186127597
0.04795029842293139
0.06985055591382204
0.08783794235473262
0.10972125556683458
0.12769905532704756
0.14177599252187412
0.15195532148951235
0.16604323032024337
0.17623493404767143
0.1903365777769362
0.20054358407330938
0.20685775289490638
0.21708334210377714
0.22341444873558564
0.2336556393630333
0.24000095267536478
0.2502551912296091
0.2566123649647886
0.2668774574662

-0.23692760332709004
-0.23290538821517223
-0.2327983673621758
-0.22880170722380325
-0.22871978729233153
-0.22474784272254925
-0.2246904711335006
-0.22074302907247947
-0.22071027388134903
-0.21678773578691996
-0.21678027843852632
-0.2128836630265035
-0.21290280559755353
-0.20903376602489976
-0.20908145262383965
-0.20524230811114597
-0.19751818026332646
-0.19371198614999122
-0.1860191228856758
-0.18224305357303466
-0.17457908562190913
-0.17083110007060887
-0.16319439895105742
-0.15947318251877898
-0.15186281566529852
-0.14816773910241873
-0.1405834408958333
-0.13691453578353735
-0.12935669418305107
-0.12571464417080308
-0.1181843026976787
-0.11457045002351848
-0.10706932494969383
-0.10348569184259367
-0.0960162045307199
-0.08466276886480048
-0.07722812001684165
-0.06590792872973802
-0.05850548655710423
-0.04721639178661575
-0.03984435773934286
-0.028585004073624867
-0.021242359983458123
-0.010012145810814999
-0.0026986192126403664
0.0085023248871757
0.01578627789736048
0.0269570939204070

0.05684798021854607
0.059681527868171325
0.05862120066098187
0.061471912008655205
0.06042822921155149
0.06329523131124937
0.062267546109322794
0.06515038613867744
0.06413846037617561
0.06703708879900724
0.06604108246124277
0.06895584631523467
0.0679763168637518
0.07090796284876369
0.06994587429677257
0.07289556153059086
0.07195230332911987
0.07492162581072481
0.07399904178763266
0.07699006075246487
0.07609048849393331
0.07910577494466915
0.07823209611286625
0.07347140549661095
0.07262712676585523
0.06789437454644648
0.06707702249778909
0.06237013807742316
0.06157794207946292
0.05689552016458541
0.0561273609054633
0.05146862168864901
0.05072399547552923
0.04608875950248155
0.04536775757789316
0.04075643492446784
0.04005973718987227
0.03547333069084535
0.03480221339714064
0.030242335816958458
0.02959869293474804
0.025067598106635815
0.02445397618404909
0.01995460422253652
0.011571548381358487
0.007107612998031565
-0.001241312582105001
-0.005671920243630018
-0.013988332560500175
-0.026189

0.29602756709563927
0.2987138429657458
0.30530392555762387
0.3079921703290569
0.31458435795649087
0.31727485825890117
0.3238694628841358
0.32656255852567523
0.33315994894139184
0.3358560407986971
0.3424566511676354
0.3451562105147446
0.35176055043594995
0.35446412992853
0.3610727962594075
0.3637810428290229
0.37039473353132346
0.37310840348826957
0.37972793381013425
0.3824479105032879
0.38907423185896134
0.39180154629779695
0.3984357682617806
0.4011716231696009
0.4078150390626706
0.41056083690126977
0.41721495351104315
0.41997232909480464
0.4266389011413676
0.42940975954129224
0.4360908295647856
0.43887739008610505
0.4455753344910136
0.4483801815842686
0.45509776361223775
0.4579239056335529
0.45686052451803355
0.45971147491417375
0.4586714152188122
0.4615445860877959
0.46052559644030144
0.4634189879540087
0.462419366459371
0.46533151071181655
0.4643500583598015
0.46727997647955777
0.4663159626630729
0.46926313425414784
0.46831627376961343
0.4712806179128726
0.47035105895118995
0.473332

-0.11021363206203402
-0.10944927595868648
-0.10479732572873929
-0.10406227885773983
-0.09944031594843586
-0.09873598526819731
-0.09414582716121465
-0.09347435908107163
-0.08891858951906424
-0.08048051804098397
-0.07596256812035586
-0.06756087019069784
-0.06307841504220657
-0.0547112940363833
-0.05026291831145243
-0.04192945127598778
-0.0375146043987076
-0.02921471300157836
-0.024833684028139874
-0.016568124166576402
-0.012222035630129555
-0.003992405548040461
0.0003167744280795472
0.00850800778631499
0.020579267196650052
0.036530474976605984
0.0563634780292264
0.07227898061056591
0.09208431309486913
0.10797832053117665
0.12776947244168038
0.14365560882553038
0.1556408086243303
0.1637276533483152
0.1757217311128728
0.18381750140561176
0.1958206052724163
0.20392556486178187
0.21593806782981656
0.2240527082369766
0.2360752145015575
0.24420026457862823
0.2562336215093662
0.26437006277330566
0.27641537871790134
0.28456446676797403
0.2966231349821551
0.3047864274145915
0.31686015652965077
0.

0.18519686022661244
0.18936733483028376
0.1974475067965321
0.20163245012299721
0.2097279813424839
0.2139293632114593
0.22204239301019169
0.22626257021267535
0.23439564391107237
0.2386374131031474
0.2389897382629538
0.2432564118878677
0.24363204766938196
0.2479208416905132
0.24831734269955072
0.2526260607578224
0.2530415275956495
0.25736850015613677
0.25780152818101176
0.2621455648065326
0.2625952047682151
0.2669555585095066
0.26742128835145595
0.2717976313970875
0.2722793386419071
0.27667174856047827
0.27716972285609337
0.28157867896473854
0.28209361454989273
0.286520004142434
0.2870530121764851
0.2914981465317444
0.29205077741496877
0.2965164176711155
0.2970906936469904
0.30157908675847084
0.3021775452238251
0.298887894218833
0.29951365868305213
0.29624963214570116
0.2968997951081187
0.29365885931053354
0.29433115840816654
0.29111138064859954
0.29180413593225973
0.28860413250120015
0.2893161979951097
0.28613509510563323
0.28686582369423647
0.2837032307567992
0.28445245157860916
0.2813

-0.00971573718126361
-0.021064904896133912
-0.03630192296882227
-0.04762590384554072
-0.06284496189789517
-0.07415702520720452
-0.08937083231684494
-0.10068384370778605
-0.10810021985189493
-0.11162253163164093
-0.11905590168300159
-0.12259508579086971
-0.13004532659295728
-0.1336014852214624
-0.1410688981608172
-0.14464255796136719
-0.15212787049844867
-0.15571999057245905
-0.1632243680709266
-0.1668363583772884
-0.17436142622446965
-0.17799517661406858
-0.185543052770372
-0.18920097300434266
-0.19677431121218866
-0.20045938241585845
-0.20025819005868212
-0.20397404914824407
-0.20380226306476698
-0.20754660205843825
-0.2074023371099111
-0.21117358709105746
-0.2110556622630348
-0.21485294636491756
-0.21476085006748896
-0.21858395303638434
-0.21851782399808997
-0.22236717765220865
-0.22232780112285824
-0.22620448514471722
-0.2261933042194865
-0.2300990618510143
-0.23011820383762502
-0.2340554721327175
-0.23410778994603854
-0.23027722058332162
-0.23036637928185347
-0.22657132790284573
-0

0.15071422079405009
0.15352054768484685
0.16023203489489318
0.16304302432856876
0.1697592291513126
0.17257502246820636
0.1792961443089467
0.18211700194981206
0.18884336129794896
0.19166966788476594
0.19840171278900057
0.20123398526236694
0.20797230090834307
0.21081119988633082
0.21755652146542143
0.2204028660649939
0.22715609519699054
0.2300108816387912
0.23677310668759963
0.23963753108895633
0.2464100517890989
0.2492855376386706
0.25606989453055795
0.2589581254304409
0.2657561346815844
0.2686590930365
0.2754728872992166
0.2783928997225046
0.28522497573741173
0.28816476602035807
0.2872140819954519
0.29017684416485534
0.28924746440482013
0.2922302465636218
0.29131953097303687
0.29431991867365614
0.29342572076213386
0.29644177120388515
0.2955623810852165
0.2985925692031365
0.2977266671568107
0.30076984243838417
0.29991646214686607
0.30297181515104077
0.30213031582688726
0.30519735354879735
0.3043674003022098
0.307445929706636
0.3066274816573755
0.3097176008834837
0.308910906680877
0.3120

-0.1818391409495619
-0.18494654616239295
-0.19196329322839528
-0.19508442027680403
-0.20211577939867265
-0.2052525876193096
-0.21230067941772449
-0.21545549642525472
-0.2225228290527522
-0.22569840328745616
-0.224984052729306
-0.22818362767033742
-0.22749164210652756
-0.230712341772754
-0.23004017132064816
-0.2332796837438054
-0.23262529961650621
-0.2358818133526396
-0.23524365360141924
-0.2385158067817896
-0.2378927332805592
-0.2411795741159872
-0.24057083999311998
-0.24387179779186757
-0.24327702462575407
-0.2465918905656466
-0.24601105481086843
-0.24933997189938226
-0.24877340018003566
-0.2521168620294009
-0.25156523511320505
-0.2549240933443155
-0.25438845878860267
-0.25776393905283745
-0.25724573268593565
-0.26063945945262995
-0.26014053601539094
-0.26355456640663566
-0.2630772398196015
-0.266514106869792
-0.26606120069600425
-0.26172043414412344
-0.2612953599943386
-0.2569809023787016
-0.2565810514337525
-0.2522906742467328
-0.25191410005143244
-0.24764619770869004
-0.24729156082

0.14120835624263942
0.1481924989914266
0.15128534078649308
0.1504886907722731
0.15360630539203976
0.15283273938735992
0.15597216492308566
0.1552190585373324
0.15837791494763195
0.1576431812352249
0.16081960513121743
0.16010163895370222
0.1632942312573289
0.1625918651757126
0.1657996505632598
0.16511212147336912
0.16833451865471336
0.16766144448177459
0.1708982465446846
0.17023961246159042
0.17349097670477776
0.17284713050894107
0.17611357738361236
0.17548523384668965
0.17876765481604814
0.17815590900091552
0.1814555833045576
0.18086193301832323
0.18418055348746104
0.1836069312007114
0.1869466394012153
0.18639545410117112
0.18975888516895636
0.18923307470958298
0.1848199845012844
0.18432309759844911
0.17993748893482328
0.1794670811195791
0.17510690528958908
0.17466122358519157
0.17032508642064645
0.16990301836266058
0.16559014068847142
0.1651911788979589
0.16090137146686198
0.1605255930206746
0.15625924554584658
0.15590730553571425
0.15166538955798275
0.15133852840451775
0.1471226138847

0.12374671181306854
0.12655305272497316
0.13327311725622124
0.13610284637200104
0.13504412920731554
0.13790060673716617
0.13686708060809444
0.13974761832562843
0.13873695985735823
0.14163950409159667
0.14064998393964276
0.14357305785079205
0.14260349340706502
0.14554615347189587
0.1445958745751698
0.14755768037974293
0.14662650746138178
0.14960750405370657
0.14869573827532329
0.15169645052369796
0.1508048756326486
0.15382631423715928
0.15295621005053575
0.15599988904418688
0.15515305665083995
0.15822102238327043
0.15739982130489033
0.16049469300120192
0.15970209064972468
0.15502403725858033
0.15426369417617458
0.1496165415563403
0.14888621208150274
0.1442681590925172
0.1435663734150446
0.13897636071356936
0.13830237987464247
0.1337400579371568
0.13309384614288797
0.12855955965996646
0.1279417732575418
0.12343656166659984
0.12284855421938862
0.1183741682064467
0.11781801118928559
0.11337694504900785
0.1050529245441762
0.1006484939331491
0.09235959039702915
0.08798932488311612
0.07973357

-0.003597994534144333
-0.004370520129005502
-0.009044053675539758
-0.017617812628302983
-0.03009271708580096
-0.03866639651114013
-0.043343151593315574
-0.04412571964148704
-0.04881939412314714
-0.04961909317629472
-0.0543301921038799
-0.0551477541229904
-0.05987721194280348
-0.060713807785532564
-0.06546300509718384
-0.06632026796473062
-0.07109105859372668
-0.07197111823692963
-0.0767658668184979
-0.07767139491492321
-0.07468958715463274
-0.07562387258700037
-0.072669238395251
-0.07362957547765654
-0.07069980329713492
-0.07168417007851055
-0.06877759093781702
-0.06978459195256558
-0.06690013356074158
-0.06792895781359004
-0.0650661126785441
-0.06611650602912977
-0.0632753131268324
-0.06434756437868308
-0.061528604012690925
-0.06262354419367287
-0.0598279458443536
-0.060946960352400194
-0.05817642348519375
-0.059321476933028236
-0.0565783048831364
-0.05775197857613445
-0.05503912571703243
-0.04844179364253254
-0.04576295177507494
-0.039198301849769454
-0.036551302643355224
-0.03001763

0.009126581249208612
0.005488637470336831
0.005759273743943074
0.0021333168410855824
0.002416559329169073
-0.001196041038499619
-0.0008986785161686173
-0.004496234433211667
-0.011987256262661236
-0.02337210068690316
-0.038652870638548165
-0.05002839031027134
-0.05750358901467686
-0.06108204120667307
-0.06076566483204758
-0.06435907143840534
-0.06405627344273657
-0.06766210693097502
-0.0673705580632902
-0.07098663458435688
-0.07070431368528605
-0.07432873433585341
-0.07405387505267054
-0.07768497476852398
-0.07741601885013324
-0.08105232167592234
-0.08078787777020466
-0.08442805737178322
-0.08416686433446201
-0.08780970873409552
-0.08755060254338037
-0.09119498215874743
-0.09874174363267216
-0.10238657949156443
-0.10213177255107395
-0.10578270691639807
-0.10553348448255587
-0.1091895483527324
-0.10894501807420494
-0.11260538383994273
-0.11236478218477021
-0.11602874100400125
-0.11579141257117584
-0.11945835501602124
-0.11922373478209052
-0.12289313423297171
-0.122660732265303
-0.1263321

0.004245776098194235
0.00875983845887539
0.0171722188360592
0.02948380658788466
0.037892233133680724
0.0424016116057673
0.04301451509586167
0.04753634959334965
0.04816167136938352
0.05269596775099744
0.05333387693301122
0.05788095384146198
0.058531933663192294
0.0630924262133745
0.06375728229447915
0.06833215349262999
0.06901202997271016
0.07360258996680455
0.07429899375514495
0.07890692716022954
0.07962176070461141
0.0842491623684346
0.08498476547765352
0.08963418513943705
0.09039338649295925
0.08726427085975232
0.0880504659923029
0.08494680724576063
0.08575734932013201
0.08267686888438656
0.08350975225183795
0.08045077282982531
0.08130457640590957
0.07826597589547227
0.0791398209141287
0.07612099951932504
0.0770145207862926
0.07401538078244359
0.07492871070508986
0.07194964844388593
0.07288341330761111
0.06992532321850965
0.07088065137919519
0.06794494190633332
0.06892348375090752
0.06601210533750436
0.06701606500815813
0.06413155037830574
0.05736042661469214
0.054505963593441294
0.0

-0.014544185074266821
-0.011607848949593854
-0.012586992451319917
-0.009677844466502114
-0.00288228919678372
-3.5717427142628773e-06
0.006763151830106229
0.009614146148133544
0.016354326751681296
0.026983260342997126
0.041502363278487546
0.0521104770093955
0.0666161678274393
0.07721729821016225
0.08391802917393046
0.08672102323528055
0.09343189516710515
0.09624521931001184
0.10296666100187626
0.10579087773115169
0.11252357770349873
0.11535951637294378
0.12210443650237414
0.12495321076514523
0.13171160642046464
0.13457463848942117
0.14134808549687145
0.14422713766569004
0.15101756653646106
0.15391478053635393
0.1607245184810857
0.16364246335364674
0.17047428466519884
0.1734160139028218
0.17246962112981912
0.17543875654576074
0.1745183575466118
0.17751248682483403
0.17661604280919818
0.17963340825577506
0.17875949883157335
0.18179894653621342
0.18094672960253066
0.18400767340430557
0.1831768587918649
0.1862592582605493
0.18545009467932183
0.18855444648777303
0.18776772147987628
0.1908950

0.07600208513850457
0.07632336459523087
0.07275398653704769
0.07309789927670103
0.06954948736478528
0.06991307687686464
0.06638298162311282
0.06676382038953026
0.06324987746602581
0.06364600089158155
0.06014647372018641
0.06055632576064528
0.05706985847123471
0.057492247778580106
0.054017827627968126
0.054451893169103566
0.05098882161178476
0.0514340068831345
0.04798187861821853
0.04843791375441539
0.04499660322206139
0.0454634944719496
0.042033149427238604
0.04251117564002229
0.039092217587550555
0.039581933514277565
0.03617506494058518
0.03667731131073471
0.03328352980302574
0.0337994502823053
0.030420069769887606
0.030951135042009005
0.027587814539606126
0.02813585387824158
0.02479063424363245
0.025357875043707703
0.02203322437775527
0.02262234018450626
0.019321208584432842
0.012131765121718995
0.008857665723469604
0.0016937811131520555
-0.0015558156737409726
-0.008696302586649595
-0.019727230507245522
-0.03464998059455933
-0.045663154357015984
-0.06057551067382926
-0.07158483933071

0.006295475744833514
0.005482833676626414
0.0007705090134780468
-3.6274071802308116e-05
-0.004743583242060013
-0.013350965549273335
-0.02585966223055101
-0.03446759460475157
-0.039179403830004414
-0.03999820057869078
-0.0447294611290476
-0.045568621598486576
-0.050321158998094764
-0.05118278032308072
-0.05595892366601993
-0.0568456371284863
-0.053844787541610734
-0.05475983255089973
-0.05178570143468108
-0.05272631214710047
-0.04977652178615021
-0.050740604769739975
-0.04781340781654846
-0.04879948318004436
-0.045893714932985656
-0.046900872413239106
-0.04401591680950168
-0.04504378594903271
-0.04217955475814253
-0.043228288443938896
-0.040385213259413646
-0.041455484079356494
-0.03863452086776074
-0.039727529935489864
-0.03693017607933972
-0.038047674154295726
-0.03527599808986685
-0.03642031894948653
-0.033677002622892416
-0.027047982390000298
-0.024336399061274744
-0.01773759256612465
-0.01505519869906886
-0.008484499809263423
-0.0058295107788379485
0.000714466663762868
0.0033431287

0.45610047741733056
0.4561730714827314
0.4523585191189299
0.45246018222464035
0.4486730453029199
0.44880094914787405
0.44503879849023736
0.4451908043961948
0.4414518572688534
0.4416264561621092
0.4379095277433179
0.4381057954088624
0.4344102642544274
0.434627831003967
0.4309536180146685
0.43119265203885776
0.4275402125359811
0.4278014163910814
0.4241717450624693
0.42445636493225347
0.4208510135841341
0.4211608611286407
0.41758196933704955
0.41791945606062
0.41436979492757303
0.40693495999689455
0.4034180129476363
0.3960144396442798
0.3925277980323318
0.3851535293407496
0.38169556923936365
0.3743493960496592
0.37091922965140756
0.36360065537750264
0.3601980997997702
0.35290732137454295
0.34953288420737044
0.34227078921206366
0.33892567022562825
0.3316938499687726
0.32837995849168594
0.32118073692136684
0.3100980761561065
0.3029345826591468
0.2918860221241003
0.2847555780056493
0.27373893642055314
0.26663971710513873
0.25565362562545196
0.24858460711442792
0.2376284704163377
0.2305893942

-0.2254341377701779
-0.23560214765767484
-0.2418727570867324
-0.25205168535947825
-0.2583336714785038
-0.2685244686291873
-0.27481892904248073
-0.28502283047141985
-0.29133116250738234
-0.30154971515095585
-0.3078736469021647
-0.3181087429328205
-0.32445037192907444
-0.3347042900567949
-0.34106613025258087
-0.3513415852183854
-0.35772662305177905
-0.3602231847493694
-0.3666349609103929
-0.3691568005610793
-0.37559280493692065
-0.37813777654151176
-0.38459613583115543
-0.38716269251246077
-0.3936421167697819
-0.3962292677398743
-0.4027290112863471
-0.4053362920725741
-0.41185612793869003
-0.4144835833640036
-0.42102379058335404
-0.42367197013429125
-0.43023333340924347
-0.43290329879827194
-0.4394871202512803
-0.4421804651604699
-0.44878858805632377
-0.451507470211538
-0.4581423146578403
-0.460889500498561
-0.4597509855094535
-0.4625299300830241
-0.461421699451869
-0.4642299407463199
-0.4631499700319775
-0.465985806713431
-0.46493279456016584
-0.467795235746247
-0.46676856641563813
-0.4

0.3534585150505897
0.3536698472543508
0.35779309412895927
0.35802329582262565
0.3621657353684059
0.3624156170983938
0.3665782810495689
0.36684913641856864
0.37103354716421666
0.371327178565484
0.3755353786102629
0.37585413464683004
0.3800887273954481
0.3804355523494896
0.3768966168997493
0.3772751349492916
0.3737665102535413
0.36637109680166774
0.3550873797853676
0.34771616326369925
0.34425366835566673
0.3446974312491379
0.34124173029244453
0.34169213636305545
0.33824296326899844
0.33869982890481326
0.3352570849720083
0.33572039136290954
0.3322841407769861
0.33275403130243986
0.3293245007675401
0.3298012824806783
0.32637886492769064
0.3268630141114418
0.3234482765269184
0.32394044839554925
0.32053414391380225
0.32103518662914077
0.31763827112945264
0.3181492443962224
0.31476289794842566
0.31528509654776266
0.3119107501529338
0.3124457328663539
0.30908510104412534
0.3096347255092038
0.306289845386353
0.3068563095044842
0.30352958715488987
0.30411547773040953
0.30080974259276744
0.293614

-0.22352784134880396
-0.2227409637460618
-0.22586857709613542
-0.22510626524519337
-0.22825900353713297
-0.22752263551836363
-0.2307021556244809
-0.22999373958587757
-0.23320233857406916
-0.23252455258786128
-0.22796234992565179
-0.22731862169106604
-0.22278900276052668
-0.2221769041824217
-0.21767791261594
-0.21709583274274158
-0.21262629203719122
-0.2120733885473408
-0.20763286740963036
-0.2071090360926435
-0.20269783074889022
-0.20220369233743254
-0.19782282559872993
-0.19735972935468574
-0.19301096625584827
-0.19258100976780873
-0.1882668896211691
-0.18007059562971553
-0.175794512294376
-0.16763478977845234
-0.16339438734661205
-0.15526941199721392
-0.15106324888708259
-0.14297207432206047
-0.1387995779187877
-0.1307421061223374
-0.12660354709171426
-0.11858051756152639
-0.11447700321825832
-0.10649000067792219
-0.10242348668887186
-0.09447496709169922
-0.08264646710578424
-0.07473984032578855
-0.06295180463370921
-0.05508483304823644
-0.043335614485548596
-0.03550706860883228
-0.0

-0.024620765501664896
-0.016275032026956314
-0.01184479723855918
-0.0035257644184603972
0.0008773503257585814
0.009168590459136622
0.021346507119029693
0.03741154129902169
0.05736596195604736
0.07340940112627092
0.09335103308717616
0.1093893274427735
0.12152918027585814
0.12977406618305107
0.13412576062192585
0.1423885461907008
0.14675645554918543
0.15503407299208113
0.1594153708616694
0.16770516216188971
0.17209738771916466
0.180397033715767
0.18479802757440467
0.19310548592711213
0.197513333548995
0.2058267841479259
0.21023976559765784
0.2185575620876429
0.2229741069791573
0.23129473317967925
0.23571337973994153
0.24403540984593172
0.2562596447634881
0.26458170456906005
0.26900378890850446
0.2773312185576408
0.28175800105947885
0.2900895142532931
0.2945197759270058
0.30285420542611347
0.3072868296132053
0.3156230963443694
0.3200570395282803
0.32839412429222936
0.3406332661010758
0.3489701806860427
0.35340716193840177
0.3617496225512566
0.3661916686594168
0.37453876542829645
0.3789850

-0.1326778332338213
-0.1324718145069029
-0.12838069505622024
-0.12040638076875987
-0.11635143329484861
-0.10841168713579724
-0.10439028245809652
-0.09648297921338685
-0.09249335371367262
-0.08461719283558623
-0.0806583962291093
-0.07281286445554962
-0.06888472630933434
-0.06107007790478463
-0.05717319228788945
-0.049390446746975916
-0.045526174385938784
-0.037777131407259495
-0.033947618729362875
-0.026234890244880585
-0.01464098547163367
-0.006968079562348164
0.00458739114301317
0.01222267004589287
0.023741343654955935
0.039142702028161044
0.058427955057291946
0.07379686184352743
0.09305667584490628
0.10840564814845927
0.12765192087328506
0.15079772542613878
0.17004185855990653
0.18538975791304566
0.19684548492505194
0.2044114099123323
0.2158918289279361
0.22348182036317923
0.2349859326999502
0.24259932479008506
0.25412673733604135
0.26176345652808425
0.2733143638215412
0.28097492055537193
0.29255010107221446
0.3002355954491188
0.31183642169215686
0.31954856297803924
0.331177025111787

-0.1369006258540398
-0.1341347798005418
-0.12748200353368733
-0.12474568696737202
-0.1181209382394824
-0.11541160635316959
-0.10881274527895061
-0.1061285582300067
-0.09955411122639456
-0.09689388103547092
-0.09034299942623582
-0.08770615264647694
-0.08117858591662473
-0.07856514130821292
-0.07206122461887218
-0.0694717852011519
-0.06299244088854958
-0.0604281999126892
-0.05397495284432794
-0.051437713388996026
-0.045012720192639355
-0.042504928192943686
-0.03611102046628843
-0.02583301949215242
-0.019473787787704276
-0.009229082918440568
-0.00290227649804966
0.0073109023650944184
0.013606699675054835
0.02378932241295361
0.03785791056432563
0.05581349522758979
0.06985510318745564
0.08779044476938504
0.10181734047740376
0.11974416304782243
0.14157303116890932
0.15950265961653834
0.17353866585847155
0.18368524936208072
0.18994488516089095
0.20012142254049303
0.20641052005132504
0.21661630547675245
0.22293456917853424
0.2331696347924634
0.2395174988189228
0.24978260469021676
0.25616123502

0.3530745113143492
0.35255005915171006
0.3559398340728105
0.3554386067786248
0.358850749824693
0.35837102716234814
0.36180407437939566
0.36134469177684725
0.3647977216629939
0.3643580363142174
0.3678306400396714
0.3674105096421942
0.3709027740364931
0.3705025477367069
0.37401505049725614
0.3736355717835174
0.3771693885596152
0.3768120100264547
0.38036873321848386
0.3800353433005236
0.3836171125631059
0.3833101744808383
0.3791163671909216
0.37883898969545327
0.37467306635096326
0.3744223869088124
0.3702818914200664
0.37005574868077656
0.36593888232250565
0.3657357555172072
0.36164132803844984
0.36146029186404743
0.3573876867921884
0.3572283809322684
0.3531775345910839
0.3530401471464374
0.3490115410033285
0.3488968052808187
0.34489147139062576
0.3448006756863392
0.34082021516828404
0.3407552253467761
0.33680183998407265
0.3367651347675508
0.33284167192050335
0.3250334597788597
0.3211434957141574
0.3133673862750953
0.30950862490828296
0.3017627851797074
0.29793373578661414
0.290217103126

-0.01198649926210118
-0.008428198207669024
-0.0009837794159366278
0.002543031828069162
0.00995636370832986
0.021255323963465862
0.0364409154966844
0.047712207097069755
0.06287681714714288
0.081937143249491
0.09709226541207532
0.1083473482416342
0.11570624624994288
0.11917117667257959
0.1265469232774564
0.13002788556027414
0.13741904183159404
0.14091481871265643
0.14832035101636734
0.1518301105614661
0.15924935961606884
0.16277263038233566
0.1702052899291265
0.17374194517975033
0.18118804965822594
0.18473830021930016
0.19219822103016984
0.195762616554066
0.2032370666778803
0.2068165052523141
0.2143065521676316
0.21790229838110453
0.22540938538843996
0.22902309712637217
0.2365490733271377
0.24018283772198368
0.2477299970153834
0.25138637609686765
0.251153783313733
0.2548358845632428
0.2546273150363676
0.2583321733472785
0.25814501312402904
0.26187026865366025
0.261702464359944
0.2654462976809333
0.2652963016645414
0.26905738229933956
0.268924109809123
0.27270155710078514
0.27258435476842

0.0758019433734744
0.07974467050706323
0.07979376011778468
0.08375479491117184
0.08382316210582832
0.08780443726890286
0.08789427125366621
0.09189819596135729
0.09201219424321097
0.08823813792438137
0.08837958132652408
0.08463137415913376
0.08479751436864266
0.08107278320961334
0.0812615227734942
0.077558503733904
0.07776833720644263
0.07408582754516102
0.07431579715220556
0.0706531204505018
0.07090278537414234
0.06725976943657057
0.06752918797066085
0.06390615501581837
0.06419587903258704
0.06059364790305402
0.06090472927100516
0.05732462957436708
0.05765863835416132
0.05410253662278242
0.05446159101874935
0.050931929120960694
0.04351538254103951
0.04001524787856772
0.032626540115388666
0.029153048164096317
0.021789700732858616
0.018340667013025527
0.011000857305460816
0.007574736244442654
0.00025724808754119637
-0.003146912671044801
-0.010442724313048263
-0.013825315009982186
-0.02109954567897805
-0.02446041168560786
-0.031712615812934886
-0.03505106158255446
-0.04228024785477223
-0.

-0.061322422510273936
-0.06494392388953671
-0.06466986965810861
-0.06830635527682517
-0.06804888451579667
-0.06389928872465912
-0.06366175532291267
-0.05953049745411963
-0.05931002798326757
-0.0551945032899765
-0.05498868755362047
-0.050886712618613446
-0.05069353841324549
-0.046603293430497406
-0.0464210915568381
-0.04234107159721824
-0.042168469866638594
-0.03809744411107011
-0.03793332922490418
-0.033870307242894755
-0.033713793570549974
-0.029657997836490896
-0.029508402266638166
-0.025459246226590904
-0.025316068407165982
-0.021273139527604287
-0.021136046941973116
-0.01709909428568164
-0.01696791173362449
-0.01293683771555994
-0.012811540879525875
-0.008786396957159548
-0.00866710983800035
-0.004648095987467586
-0.00453509209132334
-0.000522560013986189
-0.00041626726496124793
0.0035892726397047494
0.003688263196260813
0.007686130964649491
0.015581784689707032
0.02737587954533046
0.035265864725302985
0.03925557223325918
0.03934728391053002
0.043346264668113726
0.04344667813767319

-0.058299705992843884
-0.0626744003351223
-0.06315004941877922
-0.06753290335225108
-0.06801804039917553
-0.07241168125448086
-0.07290905905262596
-0.0773163477815543
-0.07782899053750922
-0.07444881349124263
-0.07498009563477312
-0.07161696240587481
-0.0721639968948586
-0.06881527110484252
-0.06937560163149065
-0.06603903587558237
-0.06661057072933461
-0.06328424743928067
-0.06386520285543872
-0.06054748339163225
-0.061136335071586605
-0.05782581595400339
-0.058421257567135385
-0.05511673283987477
-0.05571764045039089
-0.0524180692738203
-0.05302347076472351
-0.0497279494294956
-0.05033699811407519
-0.047044735765721275
-0.04765668748158807
-0.04436698492958922
-0.04498117800071266
-0.04169340906052592
-0.04230924658933866
-0.039022841469545475
-0.0396397754848646
-0.036354205784728096
-0.036971722823062145
-0.03368648774890795
-0.03430409548772766
-0.031018708930355573
-0.03163592352304433
-0.02834990166345988
-0.028966235447714282
-0.025679084575378673
-0.02629403384041059
-0.023005

0.1633817325981083
0.16602842653625263
0.17259195234205274
0.1752684825241511
0.18186258862741894
0.18457080468016482
0.1911976680026342
0.193940182447108
0.1928003550803464
0.1877782636302894
0.17887208190987453
0.1738816113077596
0.16499902286644705
0.16002575708238048
0.15115302658976315
0.14618305318003869
0.14511153661955828
0.1479357030467773
0.15465451773304284
0.16526873656766347
0.17197608103296336
0.18258513096877374
0.1892931766592628
0.19210418722231978
0.19102052602743502
0.18604278178485115
0.17716975506393812
0.1722033351594672
0.16333456195352922
0.15056094266848513
0.13387873691423374
0.1132834485279432
0.08877053918787228
0.06033640671529442
854 Episode finished after 79.000000 time steps / mean 185.020000
0.011294435531603711
0.010713380839357316
0.014040981247383462
0.013472384924661551
0.01681366407011416
0.016260161756921885
0.019617897971848923
0.019082469357432356
0.014655733563004656
0.014141781183403423
0.009734956258044552
0.009239695055465574
0.0048502858269

-0.17639363066304833
-0.19169685301834297
-0.20307899799551818
-0.21834338284580077
-0.2296871002049382
-0.24491336398017602
-0.2562189593884048
-0.271406868841062
-0.28267370199507974
-0.2978220786906676
-0.3090485642683948
-0.32415526910551756
-0.34314021905850517
-0.35820207292285455
-0.37714370989797696
-0.3921630999579668
-0.4110631746462648
-0.42604141186512834
-0.44490063611133346
-0.4598379989130727
-0.478656070251066
-0.4935518263458424
-0.5123274316377654
-0.5271798387141412
-0.545910637101265
-0.5685177365398217
856 Episode finished after 200.000000 time steps / mean 179.660000
0.04514542232520903
0.04418820064412278
0.047138422316330184
0.04619114961023487
0.049152557714999906
0.0482178661795385
0.05119320729170427
0.05027401073521751
0.04546207554203393
0.04456164543329049
0.03976682787827531
0.038882178743108196
0.0341017462840362
0.03323032516401676
0.02846193453360448
0.027601552478091963
0.02284318825583599
0.02199196127757369
0.017241882624726347
0.0163981807604628
0.

-1.1871668013952363
-1.2008160285078926
-1.2183855483305652
-1.2320722960427914
-1.2496798014505712
-1.2634053720690868
-1.28105256460206
-1.2948192024629288
-1.3047072843033753
-1.318518256762297
-1.3284491817703366
-1.3423021751926134
-1.3522742580839084
-1.3661680238333418
-1.3761806082920787
-1.3901149204318497
-1.4001683557052063
-1.414143990424123
-1.4242396287308634
-1.4382583647643687
-1.448398579284619
-1.454662369452537
-1.464850643352437
-1.471161153782143
-1.4813954856110683
-1.4877514012055797
-1.4980309457465526
-1.504432077092472
-1.5147571103611113
-1.5212043795064678
858 Episode finished after 200.000000 time steps / mean 179.660000
-0.01444925231992144
-0.01499526919023632
-0.011650046019346504
-0.01221768068357366
-0.008892546143591399
-0.009479083442437057
-0.006171613959562653
-0.0067748459549191635
-0.0034830852529243853
-0.004101250602303793
-0.0008236572652898606
-0.0014553919625586744
0.0018092028426948913
0.0011649044722847396
0.0044173299660913885
0.011565745

-0.4512933553548867
-0.46519695144811124
-0.4829802681429562
-0.49684093835466514
-0.5145814051208679
-0.528399022584058
-0.5460959027692971
-0.5598692856809762
-0.5775207803834328
-0.5990484665799358
-0.6244524047217105
-0.6537346039555325
-0.6790967474829843
-0.7083454023563877
-0.7336800235193897
-0.76290861038901
-0.7882292837844237
-0.8174507779977098
-0.8427706343491304
-0.8641930280880658
-0.8817205656829157
-0.9031585026565095
-0.9207015576642755
-0.942155086251579
-0.9597139128393634
-0.9811834717375629
-0.9987587167334686
-1.0202451403824275
-1.037837854598356
-1.05934238812934
-1.0769540473980246
-1.0984783711898818
-1.1161109073984115
-1.1376571714609147
-1.155313014003647
-1.1768838824958403
-1.1945660106632527
-1.2083613873075418
860 Episode finished after 200.000000 time steps / mean 179.660000
0.04453375798557058
0.04396337267463906
0.047281225426405935
0.054487544647136614
0.06558438282572397
0.07277104642013715
0.0838566947773465
0.0910396230318297
0.09432464020016984

-1.0831813472481042
-1.1061974802824615
-1.1253356041875493
-1.1405982276231816
-1.159786284111993
-1.1750982158968708
-1.1943354427336734
-1.2096966377069431
-1.2289834840271374
-1.244395100336088
-1.2637332151956893
-1.2791976246758354
-1.2907905614686188
-1.306311452166585
-1.3179597230196773
-1.333535518464114
-1.345238367523434
-1.3608688445110266
-1.3726268464281568
-1.3883131084326237
-1.40012816657818
-1.4080741601479494
-1.4199496075185327
-1.4279546661277789
-1.4398886782446185
-1.4479518611250377
-1.4599440513346635
-1.4680658282272818
-1.4801172168238725
-1.4882994636354505
-1.4926147533490208
-1.5008608598734472
-1.5052387503046778
-1.5135470388622816
-1.5179867927275768
-1.526357100492149
-1.5308594707906709
-1.5392931207064102
-1.543860349638688
-1.5445635490352874
-1.5491994927103696
-1.5499704807010715
-1.5546740473419864
-1.5555127530668884
862 Episode finished after 200.000000 time steps / mean 179.660000
-0.025671339926624473
-0.02497752147442354
-0.0281826147442173

-1.53412873547011
-1.5231511462511433
-1.5161036713780631
-1.505184832441965
-1.490391475005727
-1.4795200291509807
-1.4647640852128605
-1.4539230576435305
-1.4391885217760298
-1.428361623369359
-1.4136329602765874
-1.4028043393228797
-1.3958707721937431
-1.3928287696260448
-1.3858727373084114
-1.3828066587474028
-1.383628610205231
-1.3883385399245107
-1.3969382397402408
-1.4094311839186333
-1.425822225414359
-1.438312267258943
-1.4547132282145907
-1.467226343452314
-1.4836616351004084
-1.4962234583152842
-1.5127170010617448
864 Episode finished after 182.000000 time steps / mean 179.660000
0.03849715297430824
0.0389528712358494
0.04331091882665857
0.04376661597453409
0.040321853667172884
0.040781669495392386
0.037339745214889246
0.03780117509123143
0.042164515382470125
0.050430003911216194
0.05479463738466442
0.05526209078562182
0.051834401805424285
0.0523163280535228
0.048901975926274925
0.0493962992285939
0.04599339826381167
0.04649838046754815
0.04310535360317575
0.0436195465467547

0.04615295525317347
0.047127270493604455
0.04420600562493048
0.04519429811435794
0.04228658160616969
0.04328812076425813
0.040393404304621953
0.04140780173947596
0.03852586982795678
0.039553065125319184
0.03668402586229943
0.03772428068742796
0.03486856465690545
0.0359224648976019
0.033080832035910515
0.034149297889649724
0.03132285240131203
0.032407156472641085
0.029597370017821232
0.03069916364025241
0.027907907171342675
0.029029255293946676
0.02625884005363549
0.027402265124160687
0.02465549341966459
0.01802041926862949
0.0153006618119258
0.00869105177966321
0.005995637738382431
-0.0005908103662516434
-0.003263910471544166
-0.009828897781974925
-0.012481129267805459
-0.01902580249987491
-0.02165806934363825
-0.028183054265202977
-0.030795748449109803
-0.037301170728235904
-0.03989418883536743
-0.04637968269025223
-0.04895243029199823
-0.05541713519654341
-0.05796851937070968
-0.06441106488628108
-0.06693947502053495
-0.0733579536498531
-0.0758612305377493
-0.08225315835489022
-0.092

0.04786969412356439
0.04879775097383458
0.0458338892436641
0.04678376984718916
0.043843373769071596
0.03701463473721787
0.03410121283625723
0.027297971785264094
0.02440898838278357
0.01762907724656576
0.014762638932054225
0.008004493778292075
0.005159295439592577
-0.001578086968949917
-0.004402801644814573
-0.011119893942357531
-0.013924358339642718
-0.020621119805346613
-0.023405057301104232
-0.030080938630372294
-0.032843563052999604
-0.039497498847540835
-0.04223750276380658
-0.04886789175646063
-0.05158342270793006
-0.05818809557707414
-0.060876720266436965
-0.0674528947808754
-0.07791465347505766
-0.08445885403771722
-0.09489010048637159
-0.10140476309599267
-0.11180749260020688
-0.11829428696290883
-0.12866976571810668
-0.13512964478038583
-0.1454784465460537
-0.15191167898520264
-0.16223370576049181
-0.168639892113804
-0.17893437908935578
-0.1853124521057395
-0.1955779570871544
-0.2019261667679315
-0.21216054417297975
-0.2184764278970896
-0.22867678738267833
-0.2427595337936377


-0.14142410824102777
-0.14579920137070485
-0.15406952152445325
-0.15842994331045507
-0.16668597335908805
-0.17103235740390474
-0.17927453901194695
-0.1836071582027913
-0.1918355802471208
-0.1961543583610833
-0.20436876322503772
-0.20867327775842684
-0.2168730590161752
-0.22116253545571926
-0.22934672698848102
-0.23362002456518008
-0.24178728097792
-0.24604286978257267
-0.2541914378262943
-0.258427367026019
-0.2665550475848249
-0.2707689043739583
-0.2788730044612599
-0.2908654864536657
-0.29894274470310306
-0.3109099948445529
-0.3189631947222087
-0.33090762992244377
-0.338938919395973
-0.35086236311977553
-0.3588733146248918
-0.3707770463829787
-0.37876870262421064
-0.39065349455340104
-0.398626401071297
-0.41049254220358594
-0.41844676749263165
-0.43029407619482696
-0.4382292199657589
-0.4500570448659454
-0.457972234843174
-0.4697794443755495
-0.47767332182569033
-0.48945828234171684
-0.4973289763606056
-0.5090895185910423
-0.5169346090548811
-0.5286679825876924
-0.5442877043341666
-0.

-0.05459112161894095
-0.05911041977337353
-0.059726679847873665
-0.0642457697545393
-0.06486212375088028
-0.06938160856042205
-0.06999865954666244
-0.07451914476463792
-0.07513750577675345
-0.07965961443564216
-0.08027992379188771
-0.08480431186892144
-0.08542724892776542
-0.08995462138842861
-0.09058092314094096
-0.09511205099753649
-0.09574252990040422
-0.10027826869328268
-0.10091383217337688
-0.10545514230920276
-0.10609681440493754
-0.11064478382157857
-0.11129372947027749
-0.11584959914122706
-0.11650715167527483
-0.12107234451720823
-0.1217400369944307
-0.12631619079803896
-0.12699579186223076
-0.1315847969260153
-0.1322783519690714
-0.13688239417322517
-0.1375922726456836
-0.1422138827497811
-0.14294283253233347
-0.13978092941606793
-0.1405321501368688
-0.13739084956983813
-0.13816137646984297
-0.13503801562645065
-0.13582540383759703
-0.13271779591538885
-0.13352005402491576
-0.1304264303169063
-0.13124196541767885
-0.12816092754050698
-0.12898850056732755
-0.12591898191300616

0.0018574482182958463
-0.0011381878459632924
-0.0002253783351169738
-0.003210181172386219
-0.0022876019105824603
-0.005263711001351139
-0.0043333852961103024
-0.007302697333995975
-0.006366428150027375
-0.0093306468355546
-0.008390064821361154
-0.01135074550185293
-0.010407352159466322
-0.01336594238219397
-0.012421149485408628
-0.015379026456423923
-0.014434192733480006
-0.01739269860941141
-0.016449164634652777
-0.011604779810458325
-0.010663903854463292
-0.013624382936837206
-0.01268087652470678
-0.015639442415894655
-0.014694712194175584
-0.017652739285492557
-0.016708143346163166
-0.019666975376763214
-0.018723858076373785
-0.02168484211864534
-0.02074456810990993
-0.02370908820848234
-0.02277307654312221
-0.025742587825528795
-0.02481234697541462
-0.027788411048179248
-0.026865575697759887
-0.029849898151017577
-0.028936268636092236
-0.031930739500480504
-0.031028324804119926
-0.03403506283172641
-0.03314612809358423
-0.0361675297639027
-0.03529464930761917
-0.0383334434794845
-0

-0.06069329146905118
-0.06461079517022786
-0.06463164286461404
-0.06856179954813252
-0.06859640101545475
-0.07254139307614137
-0.07259211154982922
-0.07655446135999282
-0.07662403346865282
-0.07280262875833493
-0.07289423744871126
-0.06909319086097639
-0.06920384873866336
-0.06542047161107771
-0.06554770503598817
-0.061779778308662665
-0.06192156030022664
-0.058167275835438126
-0.05832197030869216
-0.05457988220250832
-0.054746198314448935
-0.051015183244486347
-0.05119213876216102
-0.04747136450918159
-0.047658257270010405
-0.04394715867613723
-0.04414354537770682
-0.04044180714271531
-0.040647488864004856
-0.03695503472382718
-0.03717004873819533
-0.033487036711826526
-0.033711654302811526
-0.03003847783345342
-0.03027320796775512
-0.026610502921059764
-0.026856101769796435
-0.0232047593869112
-0.023462245817069675
-0.019823431853244127
-0.02009410913610471
-0.01646928954658852
-0.01675477364725448
-0.013145747309253702
-0.013448002227194293
-0.009856941305205376
-0.01017832202185161

-0.06823206345872528
-0.06907967766681174
-0.06602222778349688
-0.0668650060603679
-0.06380194084165013
-0.06463826494701232
-0.061567906405549856
-0.0623960161299872
-0.059316525450751015
-0.060134476272867936
-0.057043810379903484
-0.05784942717487815
-0.05474529109737431
-0.05553611688985018
-0.05241591099266465
-0.05318914970481354
-0.057854034768572066
-0.05860647735750373
-0.06325223745041142
-0.06398687802343823
-0.06861622594805707
-0.06933557574448118
-0.07395078536408131
-0.07465694057555618
-0.07925990673246862
-0.07995460570181476
-0.08454689484428583
-0.08523156592052396
-0.08981445846627592
-0.0904902596669622
-0.09506478504443415
-0.09573263652915102
-0.10029960172500843
-0.10096021193333864
-0.10552022425362102
-0.10617411041581148
-0.11072759504678821
-0.11137509863933939
-0.11592231147698977
-0.11656360906953052
-0.12110464517510748
-0.12173975499854943
-0.1262745529319252
-0.12690333738966889
-0.13143167957122398
-0.1320538438133998
-0.13657535296777082
-0.1371904395

-0.03303376245972327
-0.029365330888279965
-0.02960447547692999
-0.025945255642560405
-0.026192894166158047
-0.022541462044872437
-0.022796274620604955
-0.019151418611113857
-0.019412282613884346
-0.01577297084652018
-0.016038930186871532
-0.012404282609395063
-0.012674521566733854
-0.009043786098764368
-0.009317607008116664
-0.005690139147983067
-0.005966943337257947
-0.0023421886874302887
-0.0026214601477038855
0.001001060602742319
0.0007197692581446128
0.004340473293018135
0.011862421141736426
0.02328654632925617
0.030810496660288053
0.03443867341465974
0.034173902439365085
0.03782138796120683
0.03757627093110993
0.04124383156558224
0.041019396690355425
0.04470828985204694
0.04450607043686721
0.048218067840362006
0.048040133387475835
0.05177755575845385
0.05162655514780408
0.04758903241689775
0.04746826971707805
0.04345941843846286
0.04336624213062705
0.039383826438860695
0.03931630582466328
0.0353587701716923
0.03531564005990546
0.031382065931473274
0.03136268735032446
0.0274527648

In [22]:
def digitize_state(observation):
    cart_pos, cart_v, pole_angle, pole_v = observation
    digitized = [
        np.digitize(cart_pos, bins=bins(-2.4, 2.4, num_dizitized)),
        np.digitize(cart_v, bins=bins(-3.0, 3.0, num_dizitized)),
        np.digitize(pole_angle, bins=bins(-0.5, 0.5, num_dizitized)),
        np.digitize(pole_v, bins=bins(-2.0, 2.0, num_dizitized))
    ]
    print(digitized)
    for i, x in enumerate(digitized):
        print(x * (num_digitized**i))
    return sum([x * (num_dizitized**i) for i, x in enumerate(digitized)])

In [24]:
num_digitized = 6
observation = env.reset()
state = digitize_state(observation)
state

[3, 3, 2, 3]
3
18
72
648


741