### The agent

A thing, or person that takes an active role. This policy decides that action is needed at every time step, given our observations.

### The environment

Some model of the world that is external to the agent and has the responsibility of providing observations and giving rewards.

In [1]:
import random 
from typing import List

In [2]:
class Environment:
    def __init__(self):
        # initialize the counter that limits the number of time steps
        self.step_left = 10
    
    # return the current environment's observation to the agent
    def get_observation(self) -> List[float]:
        # the observation vector is always zero, as the environment has no internal state in this example
        return [0.0, 0.0, 0.0]
    
    def get_actions(self) -> List[int]:
        # there are two actions that the agent can carry out
        return [0, 1] 
    
    def is_done(self) -> bool:
        return self.step_left == 0
    
    def action(self, action:int) -> float:
        # handles an agent's action and returns the reward for this action
        if self.is_done():
            raise Exception("Game is over")
        self.step_left -= 1
        return random.random()

In [3]:
class Agent:
    def __init__(self) -> None:
        self.total_reward = 0.0
    
    # step functon accepts the environment instance as an argument
    # and allows the agent to perform the following actions    
    def step(self, env: Environment):
        current_obs = env.get_observation()
        actions = env.get_actions()
        reward = env.action(random.choice(actions))
        self.total_reward += reward

In [4]:
env = Environment()
agent = Agent()

while not env.is_done():
    agent.step(env)
    
print('Total reward got: %.4f' %agent.total_reward)

Total reward got: 3.3950
