In [16]:
import gym
from common import (
    Discretizer,
)
from observers import (
    StateAnalysisLogger,
    WindowMetricLogger,
)
from training import (
    DiscreteQLearningTrainer,
    DiscreteQLearningTrainingConfig,
)

## Setup

In [17]:
# 1. Create environment
env = gym.make('MountainCar-v0')

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [18]:
# 2. Setup Training Config
epsilon = 0.2
training_config = DiscreteQLearningTrainingConfig(
    learning_rate=0.1,
    discount=0.9,
    episodes=2500
)

In [19]:
# 3. Discretize Space
discretizer = Discretizer(env, [0.1, 0.01])

In [20]:
# 4. Add Observers
mean_reward_logger = WindowMetricLogger(window_size=50, metric='reward')
state_analysis = StateAnalysisLogger(env=env, discretizer=discretizer, frequency=100)

In [21]:
# 5. Put it all together
model = DiscreteQLearningTrainer(
    env=env, 
    discretizer=discretizer, 
    training_config=training_config,
    epsilon=epsilon,
    observers=[
        mean_reward_logger,
        state_analysis
    ]
)

## Model Training / Evaluation

In [22]:
model.train()

Epoch: 0 | Window reward: -200.0
Visitation Pct: 0.021052631578947368
Epoch: 50 | Window reward: -200.0
Epoch: 100 | Window reward: -200.0
Visitation Pct: 0.28888888888888886
Epoch: 150 | Window reward: -200.0
Epoch: 200 | Window reward: -200.0
Visitation Pct: 0.4070175438596491
Epoch: 250 | Window reward: -200.0
Epoch: 300 | Window reward: -200.0
Visitation Pct: 0.4678362573099415
Epoch: 350 | Window reward: -199.42
Epoch: 400 | Window reward: -200.0
Visitation Pct: 0.5497076023391813
Epoch: 450 | Window reward: -200.0
Epoch: 500 | Window reward: -200.0
Visitation Pct: 0.5660818713450292
Epoch: 550 | Window reward: -199.46
Epoch: 600 | Window reward: -196.04
Visitation Pct: 0.6035087719298246
Epoch: 650 | Window reward: -197.36
Epoch: 700 | Window reward: -195.6
Visitation Pct: 0.6105263157894737
Epoch: 750 | Window reward: -199.98
Epoch: 800 | Window reward: -195.48
Visitation Pct: 0.6175438596491228
Epoch: 850 | Window reward: -199.78
Epoch: 900 | Window reward: -195.72
Visitation P

In [15]:
model.render()

In [14]:
model.close()