In [1]:
import gym
import random
import numpy as np

In [2]:
env_name = "CartPole-v0"
env = gym.make(env_name)
print("Observation space:", env.observation_space)
print("Action space:", env.action_space)

Observation space: Box(-3.4028234663852886e+38, 3.4028234663852886e+38, (4,), float32)
Action space: Discrete(2)


In [3]:
class HillClimbingAgent():
    def __init__(self, env):
        self.state_dim = env.observation_space.shape
        self.action_size = env.action_space.n
        self.build_model()
        
    def build_model(self):
        self.weights = 1e-4*np.random.rand(*self.state_dim, self.action_size)
        self.best_reward = -np.Inf
        self.best_weights = np.copy(self.weights)
        self.noise_scale = 1e-2
        
    def get_action(self, state):
        p = np.dot(state, self.weights)
        action = np.argmax(p)
        return action
    
    def update_model(self, reward):
        if reward >= self.best_reward:
            self.best_reward = reward
            self.best_weights = np.copy(self.weights)
            self.noise_scale = max(self.noise_scale/2, 1e-3)
        else:
            self.noise_scale = min(self.noise_scale*2, 2)
            
        self.weights = self.best_weights + self.noise_scale * np.random.rand(*self.state_dim, self.action_size)

In [None]:
agent = HillClimbingAgent(env)
num_episodes = 100

for ep in range(num_episodes):
    state = env.reset()
    total_reward = 0
    done = False
    while not done:
        action = agent.get_action(state)
        state, reward, done, info = env.step(action)
        env.render()
        total_reward += reward
        
    agent.update_model(total_reward)
    print("Episode: {}, total_reward: {:.2f}".format(ep, total_reward))

Episode: 0, total_reward: 191.00
Episode: 1, total_reward: 109.00
Episode: 2, total_reward: 10.00
Episode: 3, total_reward: 9.00
Episode: 4, total_reward: 10.00
Episode: 5, total_reward: 10.00
Episode: 6, total_reward: 9.00
Episode: 7, total_reward: 9.00
Episode: 8, total_reward: 9.00
Episode: 9, total_reward: 140.00
Episode: 10, total_reward: 10.00
Episode: 11, total_reward: 11.00
Episode: 12, total_reward: 156.00
Episode: 13, total_reward: 48.00
Episode: 14, total_reward: 147.00
Episode: 15, total_reward: 9.00
Episode: 16, total_reward: 134.00
Episode: 17, total_reward: 77.00
Episode: 18, total_reward: 10.00
Episode: 19, total_reward: 200.00
Episode: 20, total_reward: 200.00
Episode: 21, total_reward: 200.00
Episode: 22, total_reward: 200.00
Episode: 23, total_reward: 200.00
Episode: 24, total_reward: 200.00
Episode: 25, total_reward: 200.00
Episode: 26, total_reward: 200.00
