# Import des libs 

pour lancer le code : ```python3 main.py``` <br>
pour compiler la lib c++ : ```python3 setup.py build_ext --inplace```


In [1]:
import ctypes
import platform
from collections import defaultdict
import numpy as np
import random
import sys, os

In [2]:
if platform.system().lower() == "windows":
    lib_path = "./libs/secret_envs.dll"
elif platform.system().lower() == "linux":
    lib_path = "./libs/libsecret_envs.so"
elif platform.system().lower() == "darwin":
    if "intel" in platform.processor().lower():
        lib_path = "./libs/libsecret_envs_intel_macos.dylib"
    else:
        lib_path = "./libs/libsecret_envs.dylib"

In [3]:
# Now you can import your modules
from secret_envs_wrapper import SecretEnv0Wrapper
from secret_envs_wrapper import SecretEnv1Wrapper
from secret_envs_wrapper import SecretEnv0
from secret_envs_wrapper import SecretEnv1

try:
    import lib
    print("Library found, SUCCESS!")
except ImportError:
    print("Library not found")
    sys.exit(1)


Library found, SUCCESS!


# Monte carlo
* ES
* on policy monte carlo control
* off policy monte carlo control

In [4]:
env = SecretEnv0()
# Supposons que `env` soit votre environnement implémenté en Python
monte_carlo_es = lib.MonteCarloES(env, 10000)
value_table = monte_carlo_es.run()
print(value_table)

{7961: -2.0, 6726: -14.406573574128355, 6985: -10.638275251635909, 3656: -1.3235328278318752, 3781: -0.3268008361938133, 7873: -7.96, 6337: -16.92929730916605, 6216: -29.77005219771714, 6272: -28.05055777547186, 3737: -21.91107376546955, 7617: -17.751596, 8001: -3.0, 2501: -38.05720473046614, 5570: -5.136806475739322, 5273: -34.144497083699875, 4168: -14.092404812900925, 5573: -28.922537906425468, 5446: -11.974699268116684, 5577: -12.82526715393977, 6344: -5.140881755523397, 6089: -21.845296147536658, 6214: -10.321455171043791, 6553: 0.0, 7497: -17.326683366666668, 7622: -11.821197, 4649: -30.61164439733855, 5144: -46.42190881958741, 5062: -11.789523113525123, 5574: -17.25220443598896, 5576: -8.393249924970293, 6424: -19.895740452540263, 4518: -29.175323416986163, 4674: -21.873237647949626, 2322: -22.896097215572123, 5793: -6.5495095517075805, 5800: -28.748103641687564, 3606: -20.274730655915008, 3730: -20.268802494914226, 5522: -31.78211964562708, 5682: -27.14299976152485, 6912: -5.18

In [5]:
onpolicy_mc_control = lib.onPolicyMonteCarloControl(env, 10000)
q_table = onpolicy_mc_control.run()
print(q_table)

{3109: {0: -109.98380139638925}, 3240: {1: -110.08464681468712}, 2264: {1: -90.78692896264752}, 5609: {0: -103.14475593550206}, 7585: {0: -27.582793395222666}, 7729: {0: -20.790700198936463}, 6181: {0: -81.64099791238175}, 6312: {1: -77.41514866070037}, 2150: {0: -94.05361436098413}, 4457: {0: -114.90994123986668}, 4081: {0: -111.71147454074179}, 2278: {0: -95.9936499640405}, 4517: {0: -112.03024259743741}, 4648: {1: -110.13155711832428}, 5862: {0: -96.05668107430624}, 5993: {0: -90.96634364321574}, 7797: {1: -23.760800227355958}, 7921: {0: -15.920000076293945}, 8056: {2: -8.0}, 2469: {0: -80.82212276227462}, 6758: {0: -73.26322547673306}, 6889: {0: -66.93255034183284}, 7634: {0: -33.51319246121979}, 2130: {0: -95.98450943603898}, 2265: {0: -96.95404900142739}, 4818: {0: -101.5316433138839}, 5313: {0: -31.71430536760356}, 5448: {1: -30.014449577134812}, 5504: {1: -29.30752454306715}, 3957: {0: -55.150459164265584}, 6354: {0: -84.30614377371147}, 6489: {0: -78.0870131606219}, 6616: {1: 

In [6]:
mcoffpolicy = lib.OffPolicyMonteCarloControl(env, 10000)
value_table = mcoffpolicy.run()
print(value_table)

{7584: {2: -2.9701000284194947, 1: -3.940399056460381}, 7593: {2: -3.940399056460381}, 7600: {2: -2.9701000284194947, 1: -19.701995282301905}, 7954: {0: -4.0}, 7618: {1: -11.821197169381144, 0: -7.893998433791797}, 7958: {0: -4.0}, 7526: {1: -17.009719332391985}, 7653: {2: 0.9702990280408862, 1: -19.701995282301905}, 7960: {1: -6.0, 2: -2.0}, 7477: {2: -16.86358248529329}, 8006: {1: -2.0}, 7462: {1: -11.287040221158449}, 7589: {2: -10.507730817227683, 1: -10.850898141340256}, 7461: {2: -3.940399056460381, 1: -18.64338436688343}, 7472: {2: -29.405970620846105}, 7650: {0: -15.761596225841524, 1: -12.763903115551686}, 7506: {1: -11.772090235368196}, 7410: {1: -21.41794078717981}, 7537: {0: -24.504975517371754, 2: -21.07081377703533}, 7638: {0: -14.786396702379704}, 7765: {1: -7.425250071048737, 0: -9.910300085258484}, 7590: {0: -10.850898141340256, 1: -5.910598584690572}, 7656: {1: -15.761596225841524, 2: -10.83905755248089}, 7538: {0: -20.5840470995104, 1: -18.603980413897403}, 7665: {0:

: 

In [7]:
q_learning = lib.QLearning(env, 10)
q_table = q_learning.run()
print(f'Q-table: {q_table}')

# Dyna Q

In [None]:
dynaq = lib.DynaQ(env, 10)
q_table = dynaq.run()
print(q_table)

# Policy iteration, politique et value function

In [None]:
policy_iteration = lib.PolicyIteration(SecretEnv0)
policy1, V1 = policy_iteration.run()

print("Optimal policy for SecretEnv0:")
print(policy1)
print("Value function for SecretEnv0:")
print(V1)

Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 0
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
Delta: 1
D

KeyboardInterrupt: 