# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv0()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{6917: -8.64827525163591, 6214: 4.633209795524361, 3782: -50.45659851721355, 6726: -16.318159634028355, 6857: -14.46278750911955, 6272: 2.659244148973813, 5016: -0.5902279715171148, 4885: 18.885894194831454, 6209: -16.46436403587115, 6344: -14.61046872310217, 6853: -38.247169996478206, 5954: -17.150667581910366, 6089: -10.49348584143794, 6216: -15.926883663000504, 4936: -0.38222831010152936, 3737: -10.56153545897482, 4934: 1.9000067990002578, 3608: -62.257236390042834, 5266: -35.60794451519385, 5273: -23.857460602743345, 7622: -7.0927182, 3736: -9.395209078046326, 2329: -35.180690590030366, 6937: -19.437853498492338, 5570: -25.80945558997117, 6681: -10.671085363009944, 7337: -17.555955179699996, 5576: -14.10207741935979, 7209: -3.3967326046504995, 3394: 20.42295889822758, 3529: 13.748679295953156, 7369: -16.6049651298, 7489: -13.742389020000001, 7617: -13.548821749999998, 7568: -3.1523192, 7701: 0.0, 5446: -13.510670072192736, 5577: -2.301128588575518, 3734: -28.146859583708892, 3865: 

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 10000)
q_table = onpolicy_mc_control.run()
print(q_table)

{7654: {0: -29.57279340475941}, 4338: {0: -111.90885772415548}, 4600: {1: -108.08984354097703}, 5330: {0: -104.32654646462628}, 2642: {0: -102.93212834364552}, 6646: {0: -72.77536213494538}, 7160: {1: -50.13831663981622}, 7526: {0: -36.2969657338572}, 7657: {0: -28.582793395222666}, 3237: {0: -110.08464681468712}, 4952: {1: -110.17745167211235}, 5008: {1: -108.26005115117582}, 4325: {0: -116.77084291379786}, 6610: {0: -73.79506311644124}, 6745: {0: -67.46976007373684}, 6872: {1: -61.08056514265371}, 3538: {0: -115.19039896285906}, 3800: {1: -113.46841832587454}, 7762: {0: -25.7508002368927}, 1026: {0: -151.68858239001636}, 3957: {0: -111.60436085116127}, 5862: {1: -98.96767616824386}, 6121: {0: -88.79468874026665}, 3368: {1: -110.18651087259259}, 5586: {0: -98.28277074585966}, 5753: {0: -94.22502004773722}, 2258: {0: -97.94404901096414}, 2393: {0: -98.93338188632514}, 2520: {1: -99.93270801342683}, 7926: {0: -15.920000076293945}, 4585: {1: -121.59203550341353}, 4838: {0: -115.939223830

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{7701: {2: -11.880400113677979}, 7961: {0: -4.0}, 8001: {0: -6.0, 1: -2.0, 2: -3.0}, 7633: {2: -14.448129873688064}, 7506: {1: -7.836542669421274}, 7461: {1: -14.702985310423053}, 7585: {2: -3.940399056460381, 0: -11.821197169381144}, 7832: {2: -9.950000047683716}, 7409: {2: -17.516617321060387}, 7528: {2: -16.336650344914503}, 7472: {2: -13.366350282202774}, 7638: {1: -11.821197169381144}, 7765: {1: -20.790700198936463, 2: -14.850500142097474, 0: -6.940200056838989}, 7761: {0: -6.940200056838989, 2: -8.910300085258484}, 7634: {0: -9.870798122457504, 1: -11.821197169381144}, 7958: {1: -2.0, 0: -4.0}, 7618: {1: -3.940399056460381}, 7410: {1: -33.12191118553809}, 7537: {2: -15.19288681514626}, 8006: {1: -4.0}, 7590: {1: -2.3642394338762287}, 7696: {1: -11.880400113677979, 2: -11.880400113677979}, 7829: {2: -7.960000038146973, 0: -7.960000038146973}, 7893: {0: -8.960000038146973, 1: -7.562000036239624, 2: -5.9700000286102295}, 7766: {0: -13.365450127887726, 1: -11.137875106573105}, 7393: 

In [7]:
q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(f'Q-table: {q_table}')

Episode 1: Total Reward: -138, Exploration Rate: 0.995
Episode 2: Total Reward: -134, Exploration Rate: 0.990025
Episode 3: Total Reward: -29, Exploration Rate: 0.985075
Episode 4: Total Reward: -153, Exploration Rate: 0.98015
Episode 5: Total Reward: -101, Exploration Rate: 0.975249
Episode 6: Total Reward: 35, Exploration Rate: 0.970373
Episode 7: Total Reward: -88, Exploration Rate: 0.965521
Episode 8: Total Reward: 9, Exploration Rate: 0.960693
Episode 9: Total Reward: -128, Exploration Rate: 0.95589
Q-table: [[ 6.92858087e-310  5.90289496e-303 -4.86981881e-308]
 [ 1.54006032e-316  2.89314429e-300              nan]
 [ 9.04326981e-302  1.82301692e-304  3.82264778e-296]
 ...
 [-6.55893783e+001 -6.55893783e+001 -6.55893783e+001]
 [-6.55893783e+001 -6.55893783e+001 -6.55893783e+001]
 [-6.55893783e+001 -6.55893783e+001 -6.55893783e+001]]
Episode 10: Total Reward: -167, Exploration Rate: 0.95111


: 

# Dyna Q

In [8]:
dynaq = lib.DynaQ(env, 10)
q_table = dynaq.run()
print(q_table)

# Policy iteration, politique et value function

In [None]:
policy_iteration = lib.PolicyIteration(SecretEnv0)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv0:")
print(policy1)
print("Value function for SecretEnv0:")
print(V1)

Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
D

KeyboardInterrupt: 