In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gym
import time
from gym.envs.toy_text.frozen_lake import generate_random_map
from tqdm import tqdm
import sys
sys.path.append('../')
from sarsa.sarsa_linear_approximation import sarsa_linear_approximation

In [2]:
def sigmoid(x):
    s = 1 / (1 + np.exp(-x))
    return s

def LunarPhi(s):
    x,y,vx,vy,a,va,ll,lr=s
    return(sigmoid(np.array([x,y,vx,vy,a,va,x*vx,y*vy,(100-y)*(100-x),a*va,ll,lr])))
    
def prepLunarLander(render_mode=None):
    env = gym.make('LunarLander-v2',render_mode=render_mode)
    dname="LunarLander"
    nA=env.action_space.n
    dim=np.shape(env.observation_space)[0]+4
    env._max_episode_steps = 100
    _a,_b = env.reset(seed=1) 
    return(dim,nA,env,LunarPhi,dname)

In [4]:
nS,nA,env,phi,envName = prepLunarLander()
mean_reward, weights_dict = sarsa_linear_approximation(env, nA, dim=nS, episodes=50000, 
                                                       alpha=0.001, gamma=0.999, start_epsilon=1, 
                                                       max_steps_per_episode=250, phi=phi, mean=1, 
                                                       std_dev=0.1)



  0%|          | 20/50000 [00:02<1:02:35, 13.31it/s]

Episode: 0, mean_reward: -123.60825750914366, games_won: 1, avg_steps: 70.144 , epsilon: 1.0


  5%|▌         | 2517/50000 [00:34<1:44:30,  7.57it/s]

Episode: 2500, mean_reward: -105.2055541454594, games_won: 152, avg_steps: 100.0 , epsilon: 0.9499989999799996


 10%|█         | 5022/50000 [01:07<1:09:57, 10.72it/s]

Episode: 5000, mean_reward: -21.112031755712305, games_won: 240, avg_steps: 99.72 , epsilon: 0.8999979999599992


 15%|█▌        | 7518/50000 [01:42<1:43:21,  6.85it/s]

Episode: 7500, mean_reward: -78.15167631969769, games_won: 225, avg_steps: 99.902 , epsilon: 0.8499969999399988


 20%|██        | 10015/50000 [02:16<1:29:50,  7.42it/s]

Episode: 10000, mean_reward: -96.30010981437854, games_won: 149, avg_steps: 96.056 , epsilon: 0.7999959999199984


 25%|██▌       | 12510/50000 [02:53<1:28:56,  7.03it/s]

Episode: 12500, mean_reward: -84.45277917611769, games_won: 204, avg_steps: 99.282 , epsilon: 0.749994999899998


 30%|███       | 15009/50000 [03:31<1:30:19,  6.46it/s]

Episode: 15000, mean_reward: -140.745868615597, games_won: 142, avg_steps: 96.886 , epsilon: 0.6999939998799976


 35%|███▌      | 17513/50000 [04:07<1:04:48,  8.35it/s]

Episode: 17500, mean_reward: -111.93462396337446, games_won: 90, avg_steps: 93.666 , epsilon: 0.6499929998599971


 40%|████      | 20012/50000 [04:48<1:21:09,  6.16it/s]

Episode: 20000, mean_reward: -183.80925509155557, games_won: 122, avg_steps: 90.04 , epsilon: 0.5999919998399968


 45%|████▌     | 22510/50000 [05:29<1:19:30,  5.76it/s]

Episode: 22500, mean_reward: -100.38729249437895, games_won: 154, avg_steps: 96.938 , epsilon: 0.5499909998199964


 50%|█████     | 25009/50000 [06:11<1:21:09,  5.13it/s]

Episode: 25000, mean_reward: -76.88801429505618, games_won: 181, avg_steps: 99.228 , epsilon: 0.499989999799996


 55%|█████▌    | 27510/50000 [06:54<1:06:32,  5.63it/s]

Episode: 27500, mean_reward: -77.79848540761822, games_won: 175, avg_steps: 98.348 , epsilon: 0.4499889997799956


 60%|██████    | 30008/50000 [07:37<56:00,  5.95it/s]  

Episode: 30000, mean_reward: -78.73383520112147, games_won: 175, avg_steps: 97.172 , epsilon: 0.3999879997599952


 65%|██████▌   | 32512/50000 [08:21<56:24,  5.17it/s]  

Episode: 32500, mean_reward: -37.43520337356216, games_won: 209, avg_steps: 99.078 , epsilon: 0.3499869997399948


 70%|███████   | 35008/50000 [09:05<46:32,  5.37it/s]  

Episode: 35000, mean_reward: -30.69288095213329, games_won: 208, avg_steps: 99.386 , epsilon: 0.2999859997199944


 75%|███████▌  | 37513/50000 [09:50<38:49,  5.36it/s]

Episode: 37500, mean_reward: -36.10534289770373, games_won: 160, avg_steps: 98.966 , epsilon: 0.249984999699994


 80%|████████  | 40007/50000 [10:35<31:22,  5.31it/s]

Episode: 40000, mean_reward: -46.459622203777506, games_won: 125, avg_steps: 98.728 , epsilon: 0.1999839996799936


 85%|████████▌ | 42514/50000 [11:21<23:52,  5.22it/s]

Episode: 42500, mean_reward: -32.61802287563868, games_won: 135, avg_steps: 99.56 , epsilon: 0.1499829996599932


 90%|█████████ | 45011/50000 [12:05<15:51,  5.24it/s]

Episode: 45000, mean_reward: -27.382411042381488, games_won: 130, avg_steps: 99.764 , epsilon: 0.0999819996399928


 95%|█████████▌| 47508/50000 [12:51<08:00,  5.18it/s]

Episode: 47500, mean_reward: -89.01821021165114, games_won: 92, avg_steps: 96.086 , epsilon: 0.0499809996199924


100%|██████████| 50000/50000 [13:35<00:00, 61.31it/s]

Episode: 49999, mean_reward: -143.81819917446015, games_won: 12, avg_steps: 82.7 , epsilon: 0.0



