In [2]:
### Importing libraries

import pickle
import gym
from gym import logger as gymlogger
from gym.wrappers import RecordVideo
gymlogger.set_level(40) #error only
import tensorflow as tf
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import math
import glob
import io
import base64
from IPython.display import HTML
from IPython import display as ipythondisplay

### Init video func
def show_video():
    mp4list = glob.glob('video/*.mp4')
    if len(mp4list) > 0:
        mp4 = mp4list[0]
        video = io.open(mp4, 'r+b').read()
        encoded = base64.b64encode(video)
        ipythondisplay.display(HTML(data='''<video alt="test" autoplay
                loop controls style="height: 400px;">
                <source src="data:video/mp4;base64,{0}" type="video/mp4" />
             </video>'''.format(encoded.decode('ascii'))))
    else:
        print("Could not find video")

### CREATING env
env = gym.make("CartPole-v1")


 **Q-Learning Formula:** $ Q(s_{t}, a_{t}) = Q(s_{t}, a_{t}) + \alpha[r_{t+1}+\gamma\,(max Q(s_{t+1}, a)-Q(s_{t}, a_{t})]$

__Where:__
Value of current state: $Q(s_{t}, a_{t})$
Value of Future state: $Q(s_{t+1}, a)$
Reward: $r_{t+1}$
Learning rate: $\alpha\;\epsilon\;(0,1)$
Discount: $\gamma\;\epsilon\;(0,1]$

# Testing an Agent

In [3]:
import agentV1

agent = agentV1.QLearnAgentV1(maxEpisode=100)

In [6]:
agent.run()

{0: 23.0, 1: 15.0, 2: 14.0, 3: 19.0, 4: 18.0, 5: 47.0, 6: 42.0, 7: 35.0, 8: 14.0, 9: 18.0, 10: 20.0, 11: 30.0, 12: 63.0, 13: 39.0, 14: 41.0, 15: 35.0, 16: 75.0, 17: 41.0, 18: 47.0, 19: 34.0, 20: 62.0, 21: 51.0, 22: 33.0, 23: 41.0, 24: 56.0, 25: 33.0, 26: 35.0, 27: 40.0, 28: 33.0, 29: 35.0, 30: 35.0, 31: 38.0, 32: 104.0, 33: 9.0, 34: 13.0, 35: 11.0, 36: 11.0, 37: 10.0, 38: 12.0, 39: 12.0, 40: 11.0, 41: 12.0, 42: 9.0, 43: 12.0, 44: 12.0, 45: 10.0, 46: 10.0, 47: 9.0, 48: 12.0, 49: 9.0, 50: 11.0, 51: 12.0, 52: 11.0, 53: 10.0, 54: 11.0, 55: 10.0, 56: 12.0, 57: 11.0, 58: 12.0, 59: 10.0, 60: 10.0, 61: 12.0, 62: 11.0, 63: 10.0, 64: 10.0, 65: 12.0, 66: 10.0, 67: 10.0, 68: 12.0, 69: 11.0, 70: 11.0, 71: 9.0, 72: 12.0, 73: 10.0, 74: 12.0, 75: 12.0, 76: 9.0, 77: 9.0, 78: 10.0, 79: 11.0, 80: 10.0, 81: 12.0, 82: 10.0, 83: 10.0, 84: 11.0, 85: 12.0, 86: 11.0, 87: 10.0, 88: 10.0, 89: 10.0, 90: 11.0, 91: 11.0, 92: 10.0, 93: 10.0, 94: 9.0, 95: 11.0, 96: 12.0, 97: 9.0, 98: 12.0, 99: 12.0}
{0: [4.5006587997

In [7]:
agent.run()

{0: 23.0, 1: 15.0, 2: 14.0, 3: 19.0, 4: 18.0, 5: 47.0, 6: 42.0, 7: 35.0, 8: 14.0, 9: 18.0, 10: 20.0, 11: 30.0, 12: 63.0, 13: 39.0, 14: 41.0, 15: 35.0, 16: 75.0, 17: 41.0, 18: 47.0, 19: 34.0, 20: 62.0, 21: 51.0, 22: 33.0, 23: 41.0, 24: 56.0, 25: 33.0, 26: 35.0, 27: 40.0, 28: 33.0, 29: 35.0, 30: 35.0, 31: 38.0, 32: 104.0, 33: 9.0, 34: 13.0, 35: 11.0, 36: 11.0, 37: 10.0, 38: 12.0, 39: 12.0, 40: 11.0, 41: 12.0, 42: 9.0, 43: 12.0, 44: 12.0, 45: 10.0, 46: 10.0, 47: 9.0, 48: 12.0, 49: 9.0, 50: 11.0, 51: 12.0, 52: 11.0, 53: 10.0, 54: 11.0, 55: 10.0, 56: 12.0, 57: 11.0, 58: 12.0, 59: 10.0, 60: 10.0, 61: 12.0, 62: 11.0, 63: 10.0, 64: 10.0, 65: 12.0, 66: 10.0, 67: 10.0, 68: 12.0, 69: 11.0, 70: 11.0, 71: 9.0, 72: 12.0, 73: 10.0, 74: 12.0, 75: 12.0, 76: 9.0, 77: 9.0, 78: 10.0, 79: 11.0, 80: 10.0, 81: 12.0, 82: 10.0, 83: 10.0, 84: 11.0, 85: 12.0, 86: 11.0, 87: 10.0, 88: 10.0, 89: 10.0, 90: 11.0, 91: 11.0, 92: 10.0, 93: 10.0, 94: 9.0, 95: 11.0, 96: 12.0, 97: 9.0, 98: 12.0, 99: 12.0}
{0: [4.5006587997