# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv2Wrapper
from secret_envs_wrapper import SecretEnv3Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1
from secret_envs_wrapper import SecretEnv2
from secret_envs_wrapper import SecretEnv3

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv3()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{28202: -376.6794109074474, 46614: -155.941281347174, 46873: -154.48614277492322, 48921: -142.30323067282228, 40983: -559.637104371008, 41235: -557.2091963343515, 41498: -554.7567639740925, 42009: -549.7773328987781, 48904: -285.7092812528249, 49416: -279.3279065940464, 50184: -269.51208503156903, 50696: -262.80184168102136, 35682: -498.0824676349311, 1458: -841.4692370521312, 35205: -419.30044393722903, 58390: -312.80460314889825, 58649: -303.8430334837356, 21505: -744.1090104947116, 3122: -366.17011072686785, 62999: -129.72412877453866, 63258: -115.88295835811986, 65050: -15.0, 21794: -611.7602145472171, 23816: -640.9228489679481, 24069: -645.3766151191395, 24834: -659.0096610623525, 25097: -663.6461222852046, 25350: -668.3294164497016, 25609: -673.0600166158603, 36102: -428.6558218339136, 36617: -435.32886627274115, 37642: -449.08417563876947, 37890: -452.61027842299944, 38153: -456.17199840707013, 38406: -459.7696953606769, 46218: -401.4622102763235, 45590: -444.62594645782457, 581

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 1000)
q_table = onpolicy_mc_control.run()
print(q_table)

{14584: {1: -1481.7916785557252}, 29302: {0: -1842.4241902741658}, 29561: {0: -1844.8729016970574}, 35702: {0: -1865.728711686052}, 26088: {1: -1382.9421345395588}, 55785: {0: -735.999729419503}, 56040: {1: -719.1916389502596}, 58981: {0: -516.5665525610599}, 48104: {1: -1053.2868840953256}, 3113: {0: -866.3650944978174}, 30950: {0: -1356.167686482486}, 31209: {0: -1353.7047207803553}, 12645: {0: -806.1795096284285, 1: -1216.06704228595}, 32993: {0: -1334.6802273472404}, 33256: {1: -1332.0002168124206}, 37734: {0: -1298.6851762693282}, 38248: {1: -1294.5976448124798}, 33640: {1: -1481.1711527281282}, 26213: {0: -1024.5418112736882}, 26472: {1: -1026.8099004861272}, 22248: {1: -2037.381151634385}, 24677: {0: -2064.6790234578184}, 24936: {1: -2067.352529032332}, 55265: {2: -769.1133489306368, 0: -1090.161612014318}, 55528: {1: -1068.8501028494638}, 30070: {0: -2492.256184935499}, 30329: {0: -2490.157738573236}, 7457: {0: -873.5323804735882}, 7720: {1: -881.3458303721408}, 37608: {1: -134

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{65025: {2: -14.0}, 63715: {1: -66.99317382614518}, 63971: {0: -57.5688618960866}, 63683: {1: -121.34089705011502}, 63939: {0: -104.38474348951972}, 64657: {2: -74.25250071048737}, 64922: {2: -5.9700000286102295}, 64577: {2: -35.64120034103394}, 64418: {2: -102.4503754679699}, 64326: {2: -98.50997641150953}, 64583: {0: -74.25250071048737}, 63991: {1: -64.37183719097052}, 64097: {1: -42.11895592173241}, 63722: {0: -147.50583981899337, 2: -99.45148349320625}, 65048: {2: -1.0, 1: -28.0}, 64881: {0: -31.84000015258789}, 64801: {1: -51.74000024795532}, 64897: {1: -31.84000015258789, 2: -24.87500011920929}, 64215: {0: -58.81194124169221}, 64819: {1: -11.940000057220459}, 64789: {1: -15.920000076293945}, 64234: {2: -66.65333540162695, 0: -80.37127069784025, 1: -83.55949151607419}, 64402: {1: -78.3079811292076}, 63861: {1: -66.83733224835967}, 64658: {1: -65.34220062522888, 0: -53.461800511550905}, 64117: {2: -112.72288737991006, 1: -114.37298706228154}, 64823: {1: -31.84000015258789}, 64310: 

In [7]:
"""q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(q_table)"""

'q_learning = lib.QLearning(env, 10)\nq_table = q_learning.run()\nprint(q_table)'

# Policy iteration, politique et value function

In [8]:
policy_iteration = lib.PolicyIteration(SecretEnv3)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv2:")
print(policy1)
print("Value function for SecretEnv2:")
print(V1)

Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
Delta = 0
