# DRL Project - MountainCar-v0

## Components

### Libraries

In [5]:
!pip install keras-rl2



In [6]:
from gym.envs.classic_control.mountain_car import MountainCarEnv
from rl.policy import GreedyQPolicy, EpsGreedyQPolicy, MaxBoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import FileLogger
from rl.agents import DQNAgent
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import json
import gym

print(f'tensorflow version = {tf.__version__}')
print(tf.config.list_physical_devices('GPU'))
print()

tensorflow version = 2.5.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]



### Environment

In [7]:
class MountainCarWithNoiseEnv(MountainCarEnv):
    def __init__(self, goal_velocity=0, noise_std=0):
        super().__init__(goal_velocity)
        self.noise_std = noise_std

    def step(self, action):
        state, reward, done, info = super().step(action)
        
        if self.noise_std > 0:
            state[0] += np.random.normal(0, self.noise_std)
            state[1] += np.random.normal(0, self.noise_std)
        
        return state, reward, done, info

### Neural Network

In [8]:
def get_model(env):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(env.action_space.n))
    
    return model

### Agent

In [9]:
def run_experiment(experiment, policy, noise_std):   
    results_file_name = get_results_file_name(experiment)
    
    try:
        env = MountainCarWithNoiseEnv(noise_std=noise_std)
        
        env.seed(0)
        np.random.seed(0)
        
        model = get_model(env)
        
        agent = DQNAgent(model=model,
                         nb_actions=env.action_space.n, 
                         memory=SequentialMemory(limit=50000, window_length=1), 
                         nb_steps_warmup=50, 
                         target_model_update=1e-2, 
                         policy=policy)
        
        callbacks=[FileLogger(results_file_name)]
        
        agent.compile(keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
        agent.fit(env, nb_steps=100000, nb_max_episode_steps=200, callbacks=callbacks,
                  visualize=False, verbose=1) # FIXME
        
        # input("Press enter to start to testing...")
        # agent.test(env, nb_episodes=5, visualize=True)
    finally:
        env.close()

### Plots

In [10]:
from google.colab import drive
drive.mount('/content/drive')

MAIN_DIR = '/content/drive/MyDrive/'

Mounted at /content/drive


In [11]:
def get_results_file_name(experiment):
    return MAIN_DIR + experiment + '.json'

def exponential_smoothing(x, alpha):
    y = np.zeros_like(x)
    
    y[0] = x[0]
    
    for i in range(1, len(x)):
        y[i] = alpha*x[i] + (1-alpha)*y[i-1]
    
    return y

def plot_results(experiments, prefix):
    plt.figure()
    plt.title('Mean reward per episode')
    plt.xlabel('Episodes')
    plt.ylabel('Reward')
    for experiment in experiments:
        results_file_name = get_results_file_name(experiment)     
        with open(results_file_name, 'r') as f:
            results = json.load(f)
        episodes = results['episode']
        rewards = results['episode_reward']
        # rewards = exponential_smoothing(rewards, 0.01) # FIXME
        plt.plot(episodes, rewards, label=experiment)
    if len(experiments) > 1:
        plt.legend()
    plt.show()
    plt.savefig(MAIN_DIR + prefix + '_rewards.jpg')
    
    plt.figure()
    plt.title('Mean Q per episode')
    plt.xlabel('Episodes')
    plt.ylabel('Q')
    for experiment in experiments:
        results_file_name = get_results_file_name(experiment)     
        with open(results_file_name, 'r') as f:
            results = json.load(f)
        episodes = results['episode']
        q = results['mean_q']
        plt.plot(episodes, q, label=experiment)
    if len(experiments) > 1:
        plt.legend()
    plt.show()
    plt.savefig(MAIN_DIR + prefix + '_q.jpg')

## Experiments

### Without noise

#### Greedy

In [None]:
run_experiment('Greedy', GreedyQPolicy(), 0)

  "The `lr` argument is deprecated, use `learning_rate` instead.")


Training for 100000 steps ...
Interval 1 (0 steps performed)




50 episodes - episode_reward: -200.000 [-200.000, -200.000] - loss: 2.382 - mae: 19.735 - mean_q: -29.158

Interval 2 (10000 steps performed)

In [None]:
plot_results('Greedy')

#### EpsGreedy eps=0.05

In [None]:
run_experiment('EpsGreedy eps=0.05', EpsGreedyQPolicy(eps=0.05), 0)

In [None]:
plot_results('EpsGreedy eps=0.05')

#### EpsGreedy eps=0.1

In [None]:
run_experiment('EpsGreedy eps=0.1', EpsGreedyQPolicy(eps=0.1), 0)

In [None]:
plot_results('EpsGreedy eps=0.1')

#### EpsGreedy eps=0.2

In [None]:
run_experiment('EpsGreedy eps=0.2', EpsGreedyQPolicy(eps=0.2), 0)

In [None]:
plot_results('EpsGreedy eps=0.2')

#### MaxBoltzmann eps=0.05

In [None]:
run_experiment('MaxBoltzmann eps=0.05', MaxBoltzmannQPolicy(eps=0.05), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.05')

#### MaxBoltzmann eps=0.1

In [None]:
run_experiment('MaxBoltzmann eps=0.1', MaxBoltzmannQPolicy(eps=0.1), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.1')

#### MaxBoltzmann eps=*0.2*

In [None]:
run_experiment('MaxBoltzmann eps=0.2', MaxBoltzmannQPolicy(eps=0.2), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.2')

#### Total Results

In [None]:
experiments = [
               'Greedy'
               'EpsGreedy eps=0.05',
               'EpsGreedy eps=0.1',
               'EpsGreedy eps=0.2',
               'MaxBoltzmann eps=0.05',
               'MaxBoltzmann eps=0.1',
               'MaxBoltzmann eps=0.2',
               ]
plot_results(experiments)