<a href="https://colab.research.google.com/github/aboelela924/Deep-Reinforcement-Learning-Hands-On/blob/master/Deep_Reinforcement_Learning_Hands_On_Chapter_02.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [23]:
from typing import List
import random
class Environment:
    
    def __init__(self):
        self.steps_left = 10

    def observation(self) -> List[float]:
        return [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

    def get_actions(self) -> List[int]:
        return [0, 1]

    def is_done(self) -> bool:
        return self.steps_left == 0
    
    def action(self, action: int) -> float:
        if self.is_done():
            raise Exception("Game Over")
        self.steps_left -= 1
        return random.random()

In [24]:
class Agent:
    
    def __init__(self):
        self.total_reward = 0.0
    
    def step(self, env: Environment):
        observation = env.observation()
        actions = env.get_actions()
        reward = env.action(random.choice(actions))
        self.total_reward += reward

In [25]:
env = Environment()
agent = Agent()

while not env.is_done():
    agent.step(env)

print("Total reward got: %0.4f"%agent.total_reward)

Total reward got: 4.7674


In [26]:
import gym 
e = gym.make("CartPole-v0")

In [27]:
obs = e.reset()
print(obs)

[-0.00505082  0.00150113  0.04672757 -0.02765997]


In [28]:
print(e.action_space)
print(e.observation_space)

Discrete(2)
Box(4,)


In [29]:
print(e.step(0))

(array([-0.00502079, -0.1942587 ,  0.04617437,  0.27939217]), 1.0, False, {})


In [31]:
print(e.observation_space.sample())
print(e.observation_space.sample())

[-4.1207629e-01  1.3863846e+38 -6.8396613e-02 -7.3983811e+37]
[ 8.7512708e-01  4.0860091e+37  3.2410052e-02 -1.3721822e+38]


<h1>The random CartPole agent</h1>

In [39]:
env = gym.make("CartPole-v0")
state = env.reset()
isDone = False 
total_reward = 0
num_of_steps = 0
while not isDone:
    action = env.action_space.sample()
    state, reward, isDone, info = env.step(action)
    total_reward += reward
    num_of_steps += 1
print("Episode done in %d steps, total reward %.2f" % (num_of_steps, total_reward))

Episode done in 23 steps, total reward 23.00


<h1>Gym Wrappers</h1>

In [45]:
import gym 
from typing import TypeVar
import random 

Action = TypeVar("Action")

class RandomActionWrapper(gym.ActionWrapper):

    def __init__(self, env, epsilon=0.1):
        super(RandomActionWrapper, self).__init__(env)
        self.epsilon = epsilon
        self.env = env

    def action(self, action: Action) -> Action:
        if random.random() < self.epsilon:
            print("Random!!!")
            return env.action_space.sample()
        return action