### Author: Ariel Guerrero
#### Single Agent Reinforcement Learning
Simple implementation of the single agent reinforcement learning algorithm.

### imports

In [None]:
import gym
import random
import numpy as np

### Functions for creating and describing the environment

In [None]:
def new_env(env_name):
    """
        description: Create a new environment.
    """
    env = gym.make(env_name)
    return env


def env_attributes(env):
    """ 
        description:    Prints Attributes of the environment 
        @param env:      Gym environment
    """
    print("observation space: ", env.observation_space)
    # number of actions
    if type(
            env.action_space) == gym.spaces.discrete.Discrete:
        print("action space: ", env.action_space)
    else:
        print("action range: ",
              env.action_space.low, env.action_space.high)


### Simple Agent class definition

In [None]:
class Agent():
    """
        description:    Agent with discrete or continuous action space
    """
    def __init__(self, env):
        # I know its discrete but putting this here for future reference
        # is the agent discrete or continuous?
        self.is_discrete = type(
            env.action_space) == gym.spaces.discrete.Discrete

        # if discrete, get the action size
        if self.is_discrete:
            self.action_size = env.action_space.n
        else:
            self.action_low = env.action_space.low
            self.action_high = env.action_space.high
            self.action_shape = env.action_space.shape
        env_attributes(env)
    def get_action(self, state):
        # if discrete, get the action
        if self.is_discrete:
            action = random.randrange(self.action_size)
        else:
            action = np.random.uniform(
                self.action_low,
                self.action_high,
                self.action_shape
            )
        return action

### different environments

In [None]:
env_name = 'MountainCarContinuous-v0'
# env_name = 'MountainCar-v0'
# env_name = 'Acrobot-v1'
# env_name = 'Pendulum-v1'

### create new environment

In [None]:
env = new_env(env_name)

### create a new agent

In [None]:
agent = Agent(env)

### fresh state

In [None]:
state = env.reset()

### Driver code

In [None]:
for i_episode in range(200):
    action = agent.get_action(state)
    state, reward, done, info = env.step(action)
    env.render()

### closing the environment

In [None]:
env.close()