In [7]:
import random
import gym
import numpy as np
import time
from IPython.display import clear_output
from agent_tools import Q_learner, performance_splitter
import pandas as pd
from vid_tools import play_vid, rec_vid
from datetime import datetime

env = gym.make('CartPole-v1')
state_space = 4
action_space = 2
alpha = 0.25198690080574915
gamma = 0.9859981046208016
epsilon = 0.14754403828791107
decay_type = 'epsilon'
decay_steps =  8839.116770460862
decay_end = 0.1300747722534384
random_policy = False

Episode: 500


In [17]:
progs = [1, 500, 2500, 5000, 10000, 20000]
for prog in progs:
    print("Training model... ",prog)
    model = Q_learner(env = env, 
                  state_space=state_space, 
                  action_space=action_space, 
                  alpha=alpha, 
                  gamma=gamma, 
                  epsilon = epsilon, 
                  decay_type=decay_type, 
                  decay_steps=decay_steps, 
                  decay_end=decay_end, 
                  random_policy=random_policy
                 )
    test = model.train(prog)
    date = datetime.now().strftime("%Y_%m_%d-%I_%M_%S_%p")
    prog = str(prog)
    agent_type = "opt"
    dir_path = r"./basic_results/vids/"+date+"_"+prog
    print("Recording model... ", prog)
    rec_vid(agent= model, env_name = 'CartPole-v1', checkpoint_path=None, dir_path = dir_path)
    print("Video saved: ", dir_path+".mp4")
print("Done!")

Episode: 20000
Recording model...  20000
Video saved:  ./basic_results/vids/2022_08_14-05_04_53_PM_20000.mp4
Done!


In [None]:
import plotly.graph_objs as go

#get percentiles
res = performance_splitter(test)

#create lists for plotly
y = [float(x) for x in res[1]]
y_upper = [float(x) for x in res[0]]
y_lower = [float(x) for x in res[2]]
x = list(np.arange(0, len(y)))
x = [x*100 for x in x]

#save to df
df = pd.DataFrame([x,y_lower, y, y_upper])
df = df.T
df.columns = ['x', 'y_lower', 'y', 'y_upper']
df.to_csv('./basic_results/optimised.csv')

#plot
fig = go.Figure([
    go.Scatter(
        name = 'episode length',
        x=x,
        y=y,

        line=dict(color='rgb(0,80,100)'),
        mode='lines'
    ),
    go.Scatter(
        x=x+x[::-1], # x, then x reversed
        y=y_upper+y_lower[::-1], # upper, then lower reversed
        fill='toself',
        fillcolor='rgba(0,80,100,0.2)',
        line=dict(color='rgba(0,0,255,0)'),
        hoverinfo="skip",
        showlegend=False
    )
])

fig.update_layout(
    yaxis_title='normalized score',
    xaxis_title='episodes',
    title='Average score for baseline Q-learning policy',
    hovermode="x"
)
fig.show()