In [1]:
#imports
import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Activation, Convolution2D, Permute
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint, Visualizer, TrainIntervalLogger, TestLogger

In [2]:
# makes the enviroment
env = gym.make('SpaceInvaders-ram-v4')
# old rom name: 'SpaceInvaders-ram-v4' or 'Breakout-ram-v4'

nb_actions = env.action_space.n
nb_obs = env.observation_space.shape

print("actions:", nb_actions, "   observations:", nb_obs)

file_name = "5L_ConeInv"
window_size = 4
NB_STEPS = 1000000
NB_STEPS_POL = 500000
NB_STEPS_WARMUP = 100000

actions: 6    observations: (128,)


In [3]:
# callbacks
#file logger
logger = FileLogger(f'training_logs_{file_name}.txt', interval=1) 

# saver callback
weights_filename = f"model/{file_name}_weights.h5f"
checkpoint_filename = f"model/{file_name}_checkpoint.h5f"
checkpoint_callback = ModelIntervalCheckpoint(checkpoint_filename,interval=100000)

In [4]:
# create the neural network model
model = Sequential()
model.add(Flatten(input_shape=(window_size,) + nb_obs))

model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(64))
model.add(Activation('relu'))

model.add(Dense(32))
model.add(Activation('relu'))

model.add(Dense(16))
model.add(Activation('relu'))

model.add(Dense(8))
model.add(Activation('relu'))


model.add(Dense(nb_actions))
model.add(Activation('linear'))

print(model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               65664     
_________________________________________________________________
activation (Activation)      (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
activation_2 (Activation)    (None, 32)                0

In [5]:
# setup the agent

# use this line if training is starting from a checkpoint
# model.load_weights("model/test_1_checkpoint.h5f")
# # or from the actual thing
# model.load_weights("model/test_1_weights.h5f")

# setup the memory buffer
memory = SequentialMemory(limit=1000000,window_length=window_size)

# create the policy
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                              attr='eps',
                              value_max=1.0,
                              value_min=.1,
                              value_test=.05,
                              nb_steps=1000000) 
# create the agent
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100000,
               target_model_update=10000, policy=policy, gamma=0.99) #removed batch size thing, maybe add back later if its actually important ig idk

In [6]:
# compile fit and evaluate teh agent
dqn.compile(Adam(lr=1e-3), metrics=['mae']) 
train_history = dqn.fit(env, nb_steps=NB_STEPS,callbacks=[logger,checkpoint_callback], visualize=False, verbose=2)

# save the weights
dqn.save_weights(f'model/{file_name}_weights.h5f', overwrite=True)

Training for 1000000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
    630/1000000: episode: 1, duration: 1.486s, episode steps: 630, steps per second: 424, episode reward: 65.000, mean reward:  0.103 [ 0.000, 20.000], mean action: 2.476 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   1274/1000000: episode: 2, duration: 1.560s, episode steps: 644, steps per second: 413, episode reward: 105.000, mean reward:  0.163 [ 0.000, 30.000], mean action: 2.398 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   2001/1000000: episode: 3, duration: 1.652s, episode steps: 727, steps per second: 440, episode reward: 335.000, mean reward:  0.461 [ 0.000, 200.000], mean action: 2.508 [0.000, 5.000],  loss: --, mae: --, mean_q: --, mean_eps: --
   2861/1000000: episode: 4, duration: 1.942s, episode steps: 860, steps per second: 443, episode reward: 225.000, mean reward:  0.262 [ 0.000, 30.000], 

In [7]:
#test the agent
dqn.compile(Adam(lr=1e-3), metrics=['mae']) 
dqn.test(env, nb_episodes=5, visualize=True)
env.close()

Testing for 5 episodes ...
Episode 1: reward: 270.000, steps: 729
Episode 2: reward: 270.000, steps: 705
Episode 3: reward: 270.000, steps: 729
Episode 4: reward: 270.000, steps: 726
Episode 5: reward: 270.000, steps: 712


In [8]:
import pandas as pd
import matplotlib.pyplot as plt

# get the text file
file = open(f"training_logs_{file_name}.txt").readlines()
# its a list so just get the first and only text output
text = file[0]

# list of things in the file to remove such that i will be left with lists then loop to delete them
char_del = ['{"loss": ',', "mae": ',', "mean_q": ',', "mean_eps": ',', "episode_reward": ',', "nb_episode_steps": ',', "nb_steps": ',', "episode": ',', "duration": ','}']
textProc = []
for i in range(10):
    text = text.replace(char_del[i],"")
#print(textProc)
text = text.replace("[","")
text = text.replace("NaN","0")

# this turns it from a text thing to a list
textProc = text.split("]")[:-1]
for i in range(len(textProc)):
    textProc[i] = textProc[i].split(",")

# list of the data columns
cols = ["loss", "mae", "mean_q", "mean_eps", "episode_reward", "nb_episode_steps", "nb_steps", "episode", "duration"]

df = pd.DataFrame()
for i in range(len(textProc)):
    df[cols[i]] = textProc[i]

df.to_csv(f"{file_name}.csv")