In [1]:
import random
from typing import List

class SampleEnvironment:
    def __init__(self):
        self.steps_left = 20
        
    def get_observation(self) -> List[float]:  #random env. values are set
        return [0.0,0.0,0.0]
    
    def get_actions(self) -> List[int]: #reward values 
        return [0,1]
    
    def is_done(self) -> bool:
        return self.steps_left == 0
    
    def action(self, action: int) -> float:
        if self.is_done():
            raise Exception("Game is Over")
        self.steps_left -= 1
        return random.random()

## agent steps
* observe environment
* make a decision about the action to take based on the observations
* submit action to the environment
* get reward for the current step 

In [2]:
class Agent:
    def __init__(self):
        self.total_reward = 0.0
        
    def step(self, env: SampleEnvironment):
        current_obs = env.get_observation()
        print("Observations {}".format(current_obs))
        actions = env.get_actions()
        print(actions)
        reward = env.action(random.choice(actions))
        self.total_reward += reward
        print("Total reward {}".format(self.total_reward))

In [3]:
if __name__ == "__main__":
    env = SampleEnvironment()
    agent = Agent()
    i = 0
    
    while not env.is_done():
        i=i+1
        print("Steps {}".format(i))
        agent.step(env)
        
    print("Total rewards: %.4f" % agent.total_reward)

Steps 1
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 0.6080243377714558
Steps 2
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 0.6797363329921139
Steps 3
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 1.0896793422765834
Steps 4
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 1.3306556409326549
Steps 5
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 1.3458135397188622
Steps 6
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 2.213972272750679
Steps 7
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 3.1668943592804744
Steps 8
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 3.3915249085995085
Steps 9
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 3.4264633289872477
Steps 10
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 4.240062626449924
Steps 11
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 4.760165789239048
Steps 12
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 4.809124257064598
Steps 13
Observations [0.0, 0.0, 0.0]
[0, 1]
Total reward 5.790471481696827
Steps 14
Obse