# DRL Project - MountainCar-v0

## Components

### Libraries

In [1]:
!pip install keras-rl2
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install -U colabgymrender
!apt-get install -y xvfb python-opengl > /dev/null 2>&1
!pip install gym pyvirtualdisplay > /dev/null 2>&1

Collecting colabgymrender
  Downloading https://files.pythonhosted.org/packages/19/1d/47289e427492af14ced09dfe1531bf3ce8178e7504a8222669b3193d165e/colabgymrender-1.0.9-py3-none-any.whl
Installing collected packages: colabgymrender
Successfully installed colabgymrender-1.0.9


In [2]:
from gym.envs.classic_control.mountain_car import MountainCarEnv
from rl.policy import GreedyQPolicy, EpsGreedyQPolicy, MaxBoltzmannQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import FileLogger
from rl.agents import DQNAgent
from colabgymrender.recorder import Recorder
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
import json
import gym

print(f'tensorflow version = {tf.__version__}')
print(tf.config.list_physical_devices('GPU'))
print()

# FIXME
display = Display(visible=0, size=(400, 300))
display.start()

Imageio: 'ffmpeg-linux64-v3.3.1' was not found on your computer; downloading it now.
Try 1. Download from https://github.com/imageio/imageio-binaries/raw/master/ffmpeg/ffmpeg-linux64-v3.3.1 (43.8 MB)
Downloading: 8192/45929032 bytes (0.0%)2170880/45929032 bytes (4.7%)6209536/45929032 bytes (13.5%)10100736/45929032 bytes (22.0%)13959168/45929032 bytes (30.4%)18006016/45929032 bytes (39.2%)22052864/45929032 bytes (48.0%)26091520/45929032 bytes (56.8%)30031872/45929032 bytes (65.4%)34013184/45929032 bytes (74.1%)38035456/45929032 bytes (82.8%)42082304/45929032 bytes (91.6%)45929032/45929032 bytes (100.0%)
  Done
File saved as /root

<pyvirtualdisplay.display.Display at 0x7f87dc065bd0>

### Environment

In [3]:
class MountainCarWithNoiseEnv(MountainCarEnv):
    def __init__(self, goal_velocity=0, noise_std=0):
        super().__init__(goal_velocity)
        self.noise_std = noise_std

    def step(self, action):
        state, reward, done, info = super().step(action)
        
        if self.noise_std > 0:
            state[0] += np.random.normal(0, self.noise_std)
            state[1] += np.random.normal(0, self.noise_std)
        
        return state, reward, done, info

### Neural Network

In [4]:
def get_model(env):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=(1,) + env.observation_space.shape))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.Dense(env.action_space.n))
    
    return model

### Agent

In [5]:
def run_experiment(experiment, policy, noise_std):   
    results_file_name = get_results_file_name(experiment)
    
    try:
        env = MountainCarWithNoiseEnv(noise_std=noise_std)
        
        env.seed(0)
        np.random.seed(0)
        
        model = get_model(env)
        
        agent = DQNAgent(model=model,
                         nb_actions=env.action_space.n, 
                         memory=SequentialMemory(limit=50000, window_length=1), 
                         nb_steps_warmup=50, 
                         target_model_update=1e-2, 
                         policy=policy)
        
        callbacks=[FileLogger(results_file_name)]
        
        agent.compile(keras.optimizers.Adam(lr=1e-3), metrics=['mae'])
        agent.fit(env, nb_steps=100000, nb_max_episode_steps=200, callbacks=callbacks,
                  visualize=False, verbose=1) # FIXME
                
        # input("Press enter to start to testing...")
        # agent.test(env, nb_episodes=5, visualize=True)
    finally:
        env.close()

### Plots

In [6]:
from google.colab import drive
drive.mount('/content/drive')

MAIN_DIR = '/content/drive/MyDrive/'

Mounted at /content/drive


In [7]:
def get_results_file_name(experiment):
    return MAIN_DIR + experiment + '.json'

def exponential_smoothing(x, alpha):
    y = np.zeros_like(x)
    
    y[0] = x[0]
    
    for i in range(1, len(x)):
        y[i] = alpha*x[i] + (1-alpha)*y[i-1]
    
    return y

def plot_results(experiments, prefix=None):
    if type(experiments) == str:
        experiments = [experiments]
        if prefix is None:
            prefix = experiments[0]
    
    plt.figure()
    plt.title('Mean reward per episode')
    plt.xlabel('Episodes')
    plt.ylabel('Reward')
    for experiment in experiments:
        results_file_name = get_results_file_name(experiment)     
        with open(results_file_name, 'r') as f:
            results = json.load(f)
        episodes = results['episode']
        rewards = results['episode_reward']
        # rewards = exponential_smoothing(rewards, 0.01) # FIXME
        plt.plot(episodes, rewards, label=experiment)
    if len(experiments) > 1:
        plt.legend()
    plt.show()
    plt.savefig(MAIN_DIR + prefix + '_rewards.jpg')
    
    plt.figure()
    plt.title('Mean Q per episode')
    plt.xlabel('Episodes')
    plt.ylabel('Q')
    for experiment in experiments:
        results_file_name = get_results_file_name(experiment)     
        with open(results_file_name, 'r') as f:
            results = json.load(f)
        episodes = results['episode']
        q = results['mean_q']
        plt.plot(episodes, q, label=experiment)
    if len(experiments) > 1:
        plt.legend()
    plt.show()
    plt.savefig(MAIN_DIR + prefix + '_q.jpg')

## Experiments

### Without noise

#### Greedy

In [None]:
run_experiment('Greedy', GreedyQPolicy(), 0)

Training for 100000 steps ...
Interval 1 (0 steps performed)
 1303/10000 [==>...........................] - ETA: 1:15 - reward: -1.0000

In [None]:
plot_results('Greedy')

#### EpsGreedy eps=0.05

In [None]:
run_experiment('EpsGreedy eps=0.05', EpsGreedyQPolicy(eps=0.05), 0)

In [None]:
plot_results('EpsGreedy eps=0.05')

#### EpsGreedy eps=0.1

In [None]:
run_experiment('EpsGreedy eps=0.1', EpsGreedyQPolicy(eps=0.1), 0)

In [None]:
plot_results('EpsGreedy eps=0.1')

#### EpsGreedy eps=0.2

In [None]:
run_experiment('EpsGreedy eps=0.2', EpsGreedyQPolicy(eps=0.2), 0)

In [None]:
plot_results('EpsGreedy eps=0.2')

#### MaxBoltzmann eps=0.05

In [None]:
run_experiment('MaxBoltzmann eps=0.05', MaxBoltzmannQPolicy(eps=0.05), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.05')

#### MaxBoltzmann eps=0.1

In [None]:
run_experiment('MaxBoltzmann eps=0.1', MaxBoltzmannQPolicy(eps=0.1), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.1')

#### MaxBoltzmann eps=*0.2*

In [None]:
run_experiment('MaxBoltzmann eps=0.2', MaxBoltzmannQPolicy(eps=0.2), 0)

In [None]:
plot_results('MaxBoltzmann eps=0.2')

#### Total Results

In [None]:
experiments = [
               'Greedy'
               'EpsGreedy eps=0.05',
               'EpsGreedy eps=0.1',
               'EpsGreedy eps=0.2',
               'MaxBoltzmann eps=0.05',
               'MaxBoltzmann eps=0.1',
               'MaxBoltzmann eps=0.2',
               ]
plot_results(experiments)