In [8]:
"""Generate Sample Trajectories"""

import numpy as np

import flmdp
import policy_approximators

from importlib import reload
reload(flmdp)
reload(policy_approximators)

from flmdp import FLMDP

# L-MDP params
mag_S = 9
mag_A = 4
l = 4

# Deterministic initial state distribution
P0 = np.zeros((mag_S))
P0[0] = 1.0

P = FLMDP.random_P(mag_S=mag_S,
                  mag_A=mag_A,
                  l=l)

lmdp = FLMDP(mag_S=mag_S,
             mag_A=mag_A,
             P=P,
             P0=P0,
             l=l)

# Trajectory Params
T = 20
m = 100
Gamma = 0.9

pi_b = FLMDP.scips_approximable_pi(lmdp=lmdp,
                                   Gamma=Gamma,
                                   sigma=0)

pi_e = FLMDP.random_pi(lmdp=lmdp)

s_b, a_b, r_b = lmdp.simulate(pi=pi_b,
                              T=T, 
                              m=m)
s_e, a_e, r_e = lmdp.simulate(pi=pi_e,
                              T=T, 
                              m=m)

In [9]:
"""Create Policy Estimators"""

from policy_approximators import naive_approx
from policy_approximators import sparsity_corrected_approx

# Naive Monte-Carlo Policy Estimator
hat_b = naive_approx(states=s_b,
                     actions=a_b,
                     rewards=r_b,
                     l=l)

# Sparsity Corrected Policy Estimator
tilde_b = sparsity_corrected_approx(states=s_b,
                                    actions=a_b,
                                    rewards=r_b,
                                    Gamma=Gamma,
                                    lmdp=lmdp)

In [10]:
"""Evaluate Policy Estimators"""

import step_is as step_is_module
from importlib import reload
reload(step_is_module)

step_is = step_is_module.step_is

gamma = 0.9

rho_pi = step_is(pi_b=pi_b, 
                 pi_e=pi_e,
                 state_samples=s_b,
                 action_samples=a_b,
                 reward_samples=r_b,
                 l=l,
                 gamma=gamma)

rho_hat = step_is(pi_b=hat_b,
                  pi_e=pi_e,
                  state_samples=s_b,
                  action_samples=a_b,
                  reward_samples=r_b,
                  l=l,
                  gamma=gamma)

rho_tilde = step_is(pi_b=tilde_b,
                    pi_e=pi_e,
                    state_samples=s_b,
                    action_samples=a_b,
                    reward_samples=r_b,
                    l=l,
                    gamma=gamma)    

print(rho_pi)
print(rho_hat)
print(rho_tilde)

2.921241353255184
0.0
-25.268239979555652


In [11]:
history = [1,2,3,4]
action = 5
tuple(history)+(action,)

(1, 2, 3, 4, 5)

In [12]:
tilde_b

{(0, 0, 0, 0, 0): 0.2590869438790346,
 (0, 0, 0, 0, 1): 0.0,
 (0, 0, 0, 0, 2): 0.4501308519918581,
 (0, 0, 0, 0, 3): 0.0,
 (0, 0, 0, 1, 0): 0.1692477048976031,
 (0, 0, 0, 1, 1): 0.056528060482698456,
 (0, 0, 0, 1, 2): 0.4309516886137996,
 (0, 0, 0, 1, 3): 0.05249034187679141,
 (0, 0, 0, 2, 0): 0.16249656530194664,
 (0, 0, 0, 2, 1): 0.05153647716025834,
 (0, 0, 0, 2, 2): 0.43100725354874336,
 (0, 0, 0, 2, 3): 0.06417749985994435,
 (0, 0, 0, 3, 0): 0.17306598227492825,
 (0, 0, 0, 3, 1): 0.05427513053592425,
 (0, 0, 0, 3, 2): 0.4368180841245559,
 (0, 0, 0, 3, 3): 0.04505859893548428,
 (0, 0, 0, 4, 0): 0.16541372438648955,
 (0, 0, 0, 4, 1): 0.06126034077540142,
 (0, 0, 0, 4, 2): 0.4261453217411718,
 (0, 0, 0, 4, 3): 0.05639840896782988,
 (0, 0, 0, 5, 0): 0.16859878583593946,
 (0, 0, 0, 5, 1): 0.04542433431645412,
 (0, 0, 0, 5, 2): 0.4281397060132573,
 (0, 0, 0, 5, 3): 0.0670549697052418,
 (0, 0, 0, 6, 0): 0.16079248227236217,
 (0, 0, 0, 6, 1): 0.054569167297654445,
 (0, 0, 0, 6, 2): 0.4445

In [13]:
from itertools import product

for action in range(mag_A):
    for s1, s2, s3, s4 in product(range(mag_S), range(mag_S), range(mag_S), range(mag_S)):
        print(pi_b[s1,s2,s3,s4,action] - tilde_b[(s1,s2,s3,s4,action)])

-0.024919897800148333
0.06662681240430371
0.09727256579905127
0.05434430692599182
0.08355250685994006
0.08862845676101688
0.07586025597850385
0.051963542625968834
0.07196102749463493
0.02711787950438252
0.13980742706261368
0.16852434187586485
0.14306368995451585
0.1485933070342434
0.12280072686037145
0.1498226850560676
0.11779261402327729
0.13958326663342208
0.0405038436171469
0.12131763081211497
0.14465484633290696
0.13329711809750588
0.12911966181164586
0.12404210775904007
0.13691924855585846
0.13588399533699896
0.14982579331806806
0.032294326201811635
0.11576680560597562
0.1389807713641361
0.14548781845348535
0.14679338820309296
0.13740092256982622
0.18233287875987042
0.1451668894957369
0.11748564763796092
0.06426893192174576
0.115384545592895
0.12725883935912852
0.14052638229799208
0.12525134759079104
0.11155579183962017
0.13352684226238742
0.13440241615642817
0.12343167403920606
0.07062131658594639
0.18991596781602987
0.18037821433022067
0.15242203902087847
0.1426257654944756
0.12

0.010061637425531844
0.001974770688303401
0.00034757773288601745
0.00041916843634229317
0.0018434886011965412
-0.011298284908619588
-0.00027012219839903095
-0.07657200439742895
0.021106377647081898
0.013422404199209564
0.00232653110491865
0.05926708437944894
0.024854370843837653
0.02389700617912796
-0.02103192513189442
0.007047244873410546
-0.13245603341869677
-0.06951491708528698
-0.021531533414522908
-0.08149846565320351
-0.06195434714594472
-0.040864964113403845
-0.047156252826672074
-0.08239343232909768
-0.06747407473365188
-0.11188834260358446
-0.0038849716286892544
0.019797143500803194
-0.0028173104073194177
0.022605180302711414
0.003795066715508305
0.021256025186421945
-0.009240068925718103
-0.021319294058656235
-0.09780006069386157
0.007265475626375695
-0.010109450574243756
0.0008497804740176251
-0.008197900116160478
0.0018317135327823864
0.012619876102118815
-0.006747246396591022
-0.027242739335581884
-0.09033472080867508
-0.014422020796482415
-0.004745898695609485
0.017126420

-0.0031980491974203384
0.0052502239570816345
-0.06680430620383543
0.001592967760484204
0.04387890151320245
-0.006102682455057851
0.002496151520537837
0.0028397121801727876
8.593459411565529e-07
-0.009121402016886299
-0.008727405567611757
-0.10561048066974688
0.012292961457969126
-0.014218675266814196
-0.018969860240841574
0.0020946834027021355
-0.004073792223725642
0.001886853419344997
-0.01638957689497289
-0.002247482439412807
-0.05902365987641145
0.01269028079346951
0.013092084592184194
-0.014945024054457412
0.023792717922547835
0.0017090069573942601
0.03288517854608233
0.023882556109056707
0.03675026307375673
-0.09635653305799918
0.005892813721448897
-0.003013369333105531
0.008979263999522369
0.0169393480633854
-0.020402928848181484
0.0252122651064432
-0.01725678783881332
-0.013514321798391793
-0.08494917840203653
0.012978535834668459
-0.007339976685562938
0.03377395049260862
0.017307777826873427
-0.0063216759481657114
0.03696107007638827
0.02872720981775201
-0.026040674670014807
-0

-0.17042612825455467
-0.05077634641557316
-0.04034342733314111
-0.09169401642381697
-0.049081799533219284
-0.08177813255981134
-0.03010023146177998
-0.08045632371404482
-0.06744184660125674
-0.09925971828790475
0.013790576194181281
-0.003387995559934409
-0.01029026590534693
-0.023130272500800658
-0.026282190428745633
-0.00856931606920222
-0.03474463413985829
-0.024507177517030188
-0.12212719086533239
-0.019167824535567785
-0.02417338086653875
-0.04051287872011844
0.0031001542941124227
-0.018578512460811464
0.010611467199349522
-0.032766093299526566
-0.027223020593351993
-0.10795878562830358
-0.01680861841892073
-0.010301926082125268
-0.02357769585329672
-0.011005325077718925
-0.010864835914673576
0.019385605860163274
-0.0279302984315698
-0.004541850955272542
-0.08472769534980146
-0.030608531633889963
-0.016632264853439743
-0.047823625729486036
-0.03141607241308261
-0.018669905836052825
0.014859042531966127
0.0034021112098858164
-0.016089494671136406
-0.10594672296755434
-0.029748721992

-0.0012433215817453935
0.0625455161119457
0.00875032999639097
-0.005986615928168071
0.00872068687651037
0.02625697777780106
0.043012419239161215
0.026143741719386493
0.046507914601855715
-0.001432758183135846
-0.020087354115199935
0.03443261320811078
0.003302717991664178
0.03994388580206901
0.03295055097672689
0.02188030787553641
0.0063560607246504475
0.07784846955063851
0.022232468164126912
0.03604803604215609
-0.017624688487062473
-0.010026780624896064
0.02380346392111174
-0.015036356041535831
0.03777033508313321
0.060249097797071244
0.11015755643372205
0.07851835704057933
0.11219920517414295
0.08061028904872558
0.07815947676390783
0.0742174010361954
0.04290030195809674
0.06411413768986068
0.09616274566510663
0.03333824610607897
-0.00024040607595660068
0.013779289200434613
0.0045587342414135645
-0.025140875999298956
0.022007941986359497
-0.014972258483434808
-0.0024318819486492116
-0.003437154268862125
0.0838776099510912
0.014086784168028776
0.006551058418563543
0.04533804539084055
0

0.07112347817685807
0.049533293479529616
0.10216484515611882
0.07634871804055399
0.06752469873726136
0.060996820575726474
0.1392625749380675
0.07609324883712165
0.04623374842752237
0.07402510678640778
0.06631287364039348
0.06631759059980713
0.06900680005837656
0.08550947835921707
0.08431529352313327
0.09689002454984963
0.08910915266883926
0.05924511659055237
0.05618975725607933
0.036455960159422346
0.07334093465074243
0.09378872940632607
0.05605433584240194
0.049554120954430264
0.15491378775752623
0.08080217847364732
0.07405969043406926
0.05678811138514464
0.05090781094577673
0.0885185344483169
0.08457693057014468
0.11430511724884995
0.08709536545103969
0.12396368362067361
0.07046346526587435
0.061868910078143075
0.0488203772505067
0.0707769967902935
0.057504131817085746
0.07620873070645812
0.09411525095102638
0.07193374799690402
0.1058773906366858
0.09199456345942938
0.06715780442158845
0.0699909714523187
0.036232528908121564
0.09913399510442886
0.05846611799425347
0.0938997800542993


0.08803185531853558
0.12592886294363304
0.045225353507273636
0.05099994624792445
0.08735344635655837
0.041586771496480435
0.08762855945159506
0.08479178273312171
0.07197793964009025
0.05078132713798525
0.10543174081430559
0.06083466606506538
0.04880804646167458
0.04871677379007874
0.05633947814479959
0.07428466177395945
0.05262982553420967
0.06945458498063459
0.052103279965902694
0.048845780487985324
0.02223867175138372
-0.019454574560145932
0.0006328791596235672
-0.014261062752720044
-0.015100061977221496
-0.0003166144094143286
0.03109779390604475
-0.011878675154649188
0.07110895289967523
0.04118883124256725
0.022602819231150706
0.02392647222981434
0.04929890822672228
0.03164623521274221
0.012243298562523847
0.02160739368881276
0.02146108241829242
0.06133244681809477
-0.014827578438580014
0.01686621932034449
-0.012265651172163938
-0.003039443344641679
-0.004536158326112455
-0.012283882517789169
0.0322814191134361
-0.010832936725969605
0.037424631012501625
-0.016264687667095296
0.00325

0.06572585383130886
0.06375112719822112
0.05247942208621556
0.049938662331130285
0.05038279866192083
0.06610510987458154
0.05837947139824992
0.05270475731212701
0.08999062085530368
0.051489926608007885
0.023370295081129866
0.048838031062835796
0.06710205894898166
0.10478952243556833
0.08519395717660053
0.040467782149028964
0.0680967582954444
0.042032954582942955
0.047090364787656824
0.026610457646378344
0.06864427660717967
-0.022954416301799563
0.006103347568508666
0.025053819307592623
0.029373513130632467
0.05956500955251437
0.013893689671359066
0.007543320118355679
0.0035524505506394077
0.012824697681379699
0.08423718944124683
0.09964318411990089
0.07532170709496394
0.06461118416837938
0.07226199514129908
0.06565338297425496
0.09846972472880727
0.09330323914257796
0.09449885682198769
0.035621665974444855
0.04965764007799517
0.07125765869282913
0.08013281846051162
0.08172142813514263
0.07472013828792992
0.0261756525712378
0.028107541953302367
0.060004914925581465
0.04173511724066356
0

-0.007955558112601624
-0.019034649727589947
-0.005576769714501106
0.009247312287295528
-0.004912693325248518
-0.03204468743624375
-0.020230968971381824
-0.021264302179636985
-0.010031132764844491
-0.006929150037532278
0.0002582433732485345
0.023106210587664877
-0.013532193636401568
-0.021339310812445722
-0.010124569394491634
-0.00016816856966028748
-0.011857712920148283
0.030352069463640136
0.01358688447389736
-0.011015142999283883
0.004017565845054527
-0.013895661215450272
-0.03071520625004945
-0.024122169958591705
-0.01345384572687458
-0.0376407149791774
0.015093475771700282
-0.018075940449184008
-0.007152763234762327
-0.015346106225388928
-0.004437128525530909
0.013609944128738505
-0.004341938702922266
-0.014267763270149308
-0.008530978323343263
0.01791389767604662
-0.027138899552411333
0.03482538045167388
-0.020190562347815882
0.024046352914508573
-0.027474804581932327
-0.01452458773662274
-0.022555371241882283
-0.04092876880680413
-0.028379938234803087
-0.021647679894621213
-0.024

-0.21511623184329917
-0.1897422962716166
-0.19866762416695116
-0.17799586211593355
-0.19341967573995297
-0.16377554123638755
-0.1858202602002269
-0.20106604542080567
-0.19939555944818413
-0.2117134839874486
-0.18662060651872758
-0.21779076145865414
-0.21352624630837425
-0.20786213479144464
-0.22081945727735988
-0.24014354490889153
-0.20786578875953365
-0.19391140024274273
-0.2156387907999481
-0.16667226461486911
-0.16964328870219286
-0.19841190679619314
-0.19433331942095627
-0.19009787176164822
-0.23405007006914336
-0.22542164614684723
-0.2192428747418712
-0.17615469842619363
-0.1953121153522396
-0.1963889166046772
-0.19405090270451475
-0.15514253450928045
-0.1669256690129987
-0.1877916111215749
-0.21668454148379251
-0.20473406607106118
-0.18751771790902344
-0.1831285865428553
-0.18914283583045938
-0.2027459176475403
-0.20863199656179457
-0.2060972735270554
-0.19989213345570417
-0.2192117978373188
-0.21086557064955688
-0.21958838185405316
-0.19138684087981933
-0.19371284539157826
-0.18

-0.22831600748179748
-0.24201357674508506
-0.2443284012023735
-0.2124677242069613
-0.19315168878074246
-0.23363168372815007
-0.21919123758136008
-0.21040489470003249
-0.22104709477226533
-0.23446802210331674
-0.22165459519844116
-0.24776071242326603
-0.21458934289981413
-0.22379398416019425
-0.23009606659562826
-0.22612125132617814
-0.19154913152713776
-0.2118182967945659
-0.22515167661721308
-0.22297014602246717
-0.23385200156770347
-0.2117562038514298
-0.22215256488762858
-0.23027769477035742
-0.23000618116184599
-0.19265921918712003
-0.2084542050750572
-0.2186854500910668
-0.2172087264625625
-0.2288004343252968
-0.20089525333846786
-0.0827633318457966
-0.08824256629841579
-0.07178596469981219
-0.06398884903103524
-0.07548868337057729
-0.04796895680829821
-0.11230616458854673
-0.07698669759693133
-0.08074392622389953
-0.062060055659697466
0.0015948088940547223
-0.02821712176260363
-0.010713470403368064
-0.008059907429041113
-0.01941816978073707
-0.045257976759035895
-0.05101048159698

0.012094368222768714
0.028573653290636686
-0.017021260394033844
0.0026382792636028274
0.0017519143085830136
-0.00021414269431546673
0.050754165506018606
0.008592770462985316
-0.01474561781454764
0.027735247081651482
-0.007216518582436188
-0.025002989983280127
0.032568268160280855
0.0065859251929824
-0.004567689854638596
0.0660517778475154
0.013911195983004054
-0.003114181851619574
0.004347269314255553
0.026758748737500365
-0.016553981865801842
0.008999210976475408
0.030915022559187877
0.009905222342421582
0.0744395751669526
-0.0019271609348406327
-0.015688774878413164
0.005315769985733437
0.00187235714976558
0.018063042174054023
0.008133099610408812
0.03479093865727412
0.00010976922036956549
0.047751168185899445
0.029698828858258364
0.001329751138676033
0.046113582121717844
-0.009673039350742957
0.007522552025918672
0.05007774922309602
0.06072175703760321
0.019253189723408837
0.11602509499975552
0.04655209537564639
0.060957501600831354
0.06375157093942058
0.05928753346276003
0.04843104

-0.020859670021681626
-0.030081212938247198
-0.01095011936322865
-0.010758788721802637
0.02169041813114811
-0.012550275203667277
0.06421806922238618
-0.00903353596211609
-0.030806462779406546
0.0338036791657397
-0.003569208484415942
-0.03371785976368766
-0.0063178479850795055
0.00413757258058578
-0.02074236275695357
0.11043544286268231
0.044316394322333974
0.05458815202364725
0.05615625614459194
0.09863923227113461
0.04097880395823736
0.07937707071392619
0.05191090959994216
0.046148142949398174
0.05126550671371999
-0.009332117436789134
-0.0031588434903868645
0.020573677522846973
-0.0006328343635379208
0.002925544971241645
0.01333981847697377
0.047222788701620805
0.044347357103967505
0.05609617999228905
0.015793897256384
-0.01095264530649881
0.017052582994301402
-0.0023424738079253493
0.02020961731299864
-0.009612208951172874
0.008431997621201365
-0.014856159110464728
0.042450926306276976
-0.013759763748697473
0.010072842549720684
-0.0012042055256143158
-0.01133004515354366
-0.025427279

0.031842384451432504
0.03989419997770571
-0.0028925997598442943
0.04973850225741869
0.02431021284119772
0.03321876917214622
-0.008432026338908855
-0.011530343824978861
-0.011713798309932077
-0.0040242494035676035
0.02142692142480046
-0.0083520192737567
0.18880462178082166
0.11188672706034461
0.13019134480999933
0.15741898304666968
0.10405251557391901
0.08694164202104696
0.15605956309875163
0.11510831684557556
0.16143295932716467
0.10140049492619516
0.051316651044050726
0.02458713562836884
0.049022303786360605
0.09552592044786443
0.050843268431106814
0.05362085590652835
0.050392729221501065
0.0822229056568908
0.13470569793520942
0.046991578527663835
0.046226718333062716
0.09495570276517487
0.039759524645360894
0.06217469747498727
0.05429080081863652
0.06847304275628205
0.0803847406890848
0.15201369721041422
0.049077045745932274
0.0834027377210993
0.05463357767585944
0.05685338108231594
0.037066678072847886
0.0779531593578541
0.060268018212103036
0.08009287652442085
0.12145864967820683
0