# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv2Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1
from secret_envs_wrapper import SecretEnv2

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv2()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{225234: -1300.2144718523361, 588773: -2392.3991862071107, 613356: -2435.0207371203214, 1293794: -2488.364622757806, 2055651: -281.26771058, 2080227: -115.42, 1588981: -2256.5378951359244, 429986: -1504.4176494551325, 515894: -1639.2692625543714, 573365: -2219.762449562635, 794469: -2288.6191302337506, 1466287: -2415.2942707456195, 1474471: -2403.3275462076967, 1891749: -1232.5221378788585, 327083: -1827.6105903291323, 524012: -2075.6116077885017, 245453: -1518.4655186212306, 278211: -1584.8555498593323, 319182: -1671.6857816088623, 1933137: -990.304734420562, 1249269: -2892.996037429729, 221134: -1622.776913904802, 229318: -1639.1685999038405, 1183732: -2825.075915237589, 2027242: -426.6993122036475, 2035441: -376.463951720856, 2043632: -325.7211633544, 1023794: -2526.0470276878063, 724940: -1973.5724478705401, 1441519: -2709.5676601712426, 614133: -2284.152986661144, 1597322: -2306.9535037276987, 1711907: -1875.8656743399806, 1720108: -1847.3390649898795, 419825: -1838.989241374114, 

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 1000)
q_table = onpolicy_mc_control.run()
print(q_table)

{749546: {0: -2918.120532042455}, 757745: {0: -2929.414650611495}, 24577: {0: -1465.139483954255}, 1818602: {0: -1754.9496997945957}, 1843177: {0: -1634.191061054911}, 1937353: {0: -1098.3197959113108}, 389105: {0: -2259.255233340583}, 278474: {0: -1800.9848812111577}, 1253313: {0: -3073.8237327969905}, 1306569: {2: -3031.3705243262557}, 1748937: {0: -2040.52240702748}, 1757136: {1: -2006.5882705971344}, 1146817: {0: -2949.9713963159606}, 1826801: {0: -1715.1006903415364}, 1155056: {1: -2949.4660284723846}, 507888: {1: -2328.091848504723}, 1429449: {0: -2897.791454039514}, 1437648: {1: -2884.6378045752163}, 1523657: {0: -2742.9879990455165}, 1507265: {0: -2780.9475400936885}, 1359792: {1: -2986.646581597498}, 888777: {0: -2950.6203449284667}, 917441: {0: -2893.6195419994056}, 307185: {0: -2061.9693892738896}, 602064: {1: -2528.2011188444767}, 614337: {0: -2543.6273682788815}, 1679297: {0: -2241.9895440977994}, 1875913: {0: -1477.8352270908474}, 778185: {0: -2806.769714856623}, 1273801:

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{2080235: {1: -131.34000062942505}, 2080227: {2: -115.4200005531311}, 2056110: {1: -296.97070130239865}, 2064325: {2: -235.42394338762284}, 2072526: {0: -178.20600170516968}, 2080725: {0: -119.40000057220459}, 2072429: {2: -172.2658016483307}, 2084720: {2: -65.0}, 2064338: {2: -251.18553961346439}, 2039778: {0: -447.2516829158674}, 2039798: {1: -365.7300980799126}, 2047974: {2: -340.30702671361297, 0: -316.89908591689635}, 2068428: {0: -175.2359016767502}, 2076627: {0: -117.41000056266785}, 2072275: {1: -184.14620176200867}, 2072517: {2: -154.44520147781373}, 2080718: {0: -103.48000049591064}, 2072528: {2: -172.2658016483307}, 2039783: {1: -433.6647521098749}, 2080711: {1: -109.45000052452087}, 2088903: {1: -56.0, 0: -54.0}, 2056112: {2: -274.45572579456365}, 2064227: {2: -243.30474150054363}, 2072428: {1: -178.20600170516968, 0: -184.14620176200867}, 2072422: {2: -178.20600170516968}, 2047975: {1: -352.0109971119712, 0: -344.20835017973235}, 2056109: {0: -287.16871109544996}, 2088911:

In [7]:
"""q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(q_table)"""

'q_learning = lib.QLearning(env, 10)\nq_table = q_learning.run()\nprint(q_table)'

# Policy iteration, politique et value function

In [9]:
policy_iteration = lib.PolicyIteration(SecretEnv2)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv2:")
print(policy1)
print("Value function for SecretEnv2:")
print(V1)

Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
