# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv0()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{3730: -1.0422821178991493, 3606: -10.405282351691268, 3737: -21.998692329961997, 7489: -4.90099501, 7552: -11.821197, 3529: 81.6404757799485, 3656: 81.45502604035202, 3781: 81.26770307106266, 7617: -3.9403989999999998, 5064: 31.42313571583795, 6337: -20.304741824364672, 6344: -2.776813111991311, 3608: -57.52065315923017, 4166: -31.962813026604014, 5266: -39.57011367306976, 5394: -53.60970684058825, 8008: -3.3333333333333335, 4934: -18.193344381394738, 6217: -13.40435594599621, 6342: -5.7222121119913085, 3782: 6.477467611382119, 7961: -4.0, 3477: -14.305101642753419, 3784: 5.423160602324601, 6854: -12.487785029069551, 6985: -13.54927026163591, 6553: -24.541016792363685, 7873: -5.97, 4777: -26.448285469803896, 3864: -19.053617509885445, 8001: -3.0, 7624: -7.8807979999999995, 3986: -30.952044967047254, 6089: -9.344695122025366, 6214: -8.428984971742793, 6856: 8.56679249911955, 6981: 24.94482575490773, 4674: -12.24005875446279, 4809: -11.757635105517972, 4936: -5.510950013007719, 6424: -3

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 10000)
q_table = onpolicy_mc_control.run()
print(q_table)

{3814: {0: -112.48831830699179}, 3961: {0: -111.60436085116127}, 5330: {0: -104.32654646462628}, 8038: {0: -9.0}, 7205: {0: -47.55425782097359}, 7336: {1: -40.96389639425397}, 6738: {0: -68.45976008327358}, 6873: {0: -62.08056514265371}, 7798: {0: -22.780700208473206}, 8056: {1: -7.0}, 7208: {1: -47.55425782097359}, 2677: {0: -100.94212833410877}, 3026: {0: -116.62065386788652}, 3288: {2: -114.9277131707597}, 3413: {0: -114.06839604335222}, 7154: {0: -36.67725502184536}, 7289: {0: -31.997226986561643}, 3154: {0: -114.79833711620543}, 3416: {1: -113.06839604335222}, 2213: {0: -94.00364995450376}, 2344: {1: -95.96328185790564}, 7634: {0: -33.51319246121979}, 3733: {0: -113.46841832587454}, 3864: {1: -112.59436086069802}, 5497: {0: -99.32984394727912}, 5624: {1: -96.29277073632292}, 8018: {0: -9.0}, 3472: {1: -113.20039895332232}, 4581: {0: -113.03024259743741}, 5605: {2: -105.11485596392156}, 5865: {0: -95.0666810647695}, 3666: {0: -115.3237352342717}, 3801: {0: -114.46841832587454}, 392

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{7513: {2: -11.772090235368196}, 7650: {1: -9.551828179727847}, 8021: {2: -4.5384615384615365, 0: -4.176470588235294, 1: -3.8000000000000003}, 7894: {0: -7.711250036954878, 1: -7.960000038146971}, 7640: {2: -9.870798122457504}, 7897: {0: -6.859444472524855, 1: -7.18611114554935, 2: -5.69232560867487}, 7737: {0: -8.910300085258484, 2: -9.504320090942382}, 7649: {0: -8.092718301628684, 2: -11.638977952632331}, 7952: {1: -3.116279069767441, 2: -2.8500000000000005}, 7928: {1: -5.748888916439484, 2: -7.499884429005532}, 7960: {2: -0.6666666666666667, 1: -2.4}, 7656: {1: -11.821197169381144, 2: -11.92593401198834}, 7529: {1: -6.890995113011094, 2: -18.15823485286299}, 7609: {0: -18.125835659717755, 2: -11.821197169381144}, 8017: {0: -4.162790697674419, 2: -3.4571428571428573, 1: -3.84}, 7990: {1: -3.018433179723504, 0: -3.4157303370786525}, 7733: {0: -7.539484687526412, 2: -8.061700077138628}, 7736: {2: -8.718680728586257, 1: -10.395350099468232}, 7993: {1: -2.9384615384615382, 2: -3.1470588

In [7]:
q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(f'Q-table: {q_table}')

Episode 1: Total Reward: 27, Exploration Rate: 0.995
Episode 2: Total Reward: -110, Exploration Rate: 0.990025
Episode 3: Total Reward: -134, Exploration Rate: 0.985075
Episode 4: Total Reward: -9, Exploration Rate: 0.98015
Episode 5: Total Reward: -105, Exploration Rate: 0.975249
Episode 6: Total Reward: -149, Exploration Rate: 0.970373
Episode 7: Total Reward: -130, Exploration Rate: 0.965521
Episode 8: Total Reward: -121, Exploration Rate: 0.960693
Episode 9: Total Reward: -21, Exploration Rate: 0.95589
Q-table: [[ 2.04993449e-316  1.75159540e+006  4.17768804e+007]
 [ 2.04993449e-316 -1.62744568e+002 -1.62744568e+002]
 [-1.62744568e+002 -1.62744568e+002 -1.62744568e+002]
 ...
 [-1.23223532e+002 -1.23223532e+002 -1.23223532e+002]
 [-1.23223532e+002 -1.23223532e+002 -1.23223532e+002]
 [-1.23223532e+002 -1.23223532e+002 -1.23223532e+002]]
Episode 10: Total Reward: -176, Exploration Rate: 0.95111


: 

# Dyna Q

In [8]:
dynaq = lib.DynaQ(env, 10)
q_table = dynaq.run()
print(q_table)

# Policy iteration, politique et value function

In [None]:
policy_iteration = lib.PolicyIteration(SecretEnv0)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv0:")
print(policy1)
print("Value function for SecretEnv0:")
print(V1)

Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
D

KeyboardInterrupt: 