# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv2Wrapper
from secret_envs_wrapper import SecretEnv3Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1
from secret_envs_wrapper import SecretEnv2
from secret_envs_wrapper import SecretEnv3

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv3()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{9795: -151.17783836814584, 26629: -787.7353961812747, 22577: -1004.1793349260184, 25221: -1024.6937898719855, 10545: -632.9555544528076, 39283: -718.3391806058472, 9041: -426.30371459314205, 28721: -486.7359979722808, 38002: -357.0607064652556, 41217: -418.00367641858867, 41480: -416.1653297157461, 46856: -385.78515182014377, 46616: -214.33741430031722, 29762: -941.0847720148095, 10850: -804.6346704755375, 2019: -1322.8246494337848, 55049: -103.71331894288562, 55302: -101.73062519483396, 58151: -78.5536652074354, 59273: -69.46985778905682, 16465: -930.230218504718, 36707: -593.1891146063596, 31257: -57.52259759878721, 62794: -181.6740574832715, 10290: -149.2111779819881, 29233: -861.2570088638506, 9809: -623.5311963895869, 30322: -448.4743135952872, 43826: -699.5974477809264, 20817: -343.7547738154595, 38993: -337.5528254305743, 59674: -301.1537381398983, 61210: -226.63670856698752, 61720: -200.78227585653252, 44056: -611.5532663841733, 1973: -72.78768927396727, 61991: -232.7100563019

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 1000)
q_table = onpolicy_mc_control.run()
print(q_table)

{50038: {0: -1390.6003291528032}, 56568: {1: -956.5518625198024}, 21233: {0: -1539.329092266134}, 21496: {1: -1544.7768459939334}, 13093: {2: -507.5642859685505}, 13609: {0: -515.8394819085246}, 14880: {1: -537.2700400485127}, 15136: {1: -541.6869039218018}, 15392: {1: -546.1483825387121}, 15648: {1: -550.6549265527854}, 39912: {1: -689.0342656597912}, 46181: {0: -665.5285869135096}, 50789: {0: -619.0321774193603}, 55909: {0: -1044.805239847952}, 58213: {0: -844.0760268893018}, 21729: {0: -1979.3362974267263}, 21992: {1: -1985.1881600954512}, 36326: {0: -2103.2985012125096}, 17461: {0: -1417.5760577299889}, 54248: {1: -1244.117505585371}, 55141: {1: -1163.8864632579314}, 55393: {0: -1143.3196488428136}, 13010: {0: -969.6021125799459}, 36726: {0: -1383.2292497446817}, 7432: {1: -502.0742472995661}, 8200: {1: -517.4428014354456}, 54135: {0: -802.0175990356336}, 54394: {0: -790.9268600937042}, 18881: {0: -955.3039111999179}, 24552: {1: -1433.3340859654513}, 18485: {0: -1665.334953770907},

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{64904: {2: -25.87000012397766}, 64897: {2: -31.84000015258789}, 63890: {2: -169.707570776195}, 64325: {2: -39.40399056460381}, 65025: {1: -17.0}, 63866: {1: -58.51985199179139}, 64210: {2: -126.46527664331916, 0: -80.36641920732679}, 64578: {1: -62.37210059680939}, 64530: {2: -35.64120034103394}, 64793: {2: -25.87000012397766}, 65153: {0: -21.0, 2: -19.0}, 64101: {1: -68.61393144864091}, 64353: {0: -64.35985125551956}, 65050: {1: -12.0, 0: 0.0}, 64179: {1: -159.72333443324806}, 64232: {1: -91.13138195413941}, 64199: {1: -93.11890696601267}, 64341: {1: -59.10598584690571}, 65122: {2: -9.0}, 64581: {1: -60.382100587272646}, 64840: {2: -63.68000030517578, 1: -59.700000286102295}, 64359: {1: -30.523192451683048}, 63953: {2: -117.03970398358278}, 64455: {1: -78.80798112920762}, 64311: {1: -73.84788109125138}, 64599: {1: -17.82060017051697}, 64193: {1: -81.30741277765843}, 65063: {1: -25.0}, 64418: {0: -88.67877925166512}, 63715: {1: -171.72910537442758}, 63971: {0: -85.8095784594569}, 6482

In [7]:
"""q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(q_table)"""

'q_learning = lib.QLearning(env, 10)\nq_table = q_learning.run()\nprint(q_table)'

# Policy iteration, politique et value function

In [8]:
policy_iteration = lib.PolicyIteration(SecretEnv3)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv2:")
print(policy1)
print("Value function for SecretEnv2:")
print(V1)

Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
