In [1]:
# based on Deep Learning Illustrated by Jon Krohn
# https://www.amazon.com/Deep-Learning-Illustrated-Intelligence-Addison-Wesley/dp/0135116694
# in turn based on bit.ly/keonDQN

import os
from collections import deque
import random
import time
import resource
import pickle

import pdb

import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential, model_from_json
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

import plotly.express as px
import plotly.graph_objects as go

# requires python 3.6
# conda install -c akode gym
import gym

# set seeds for reproducibility
# np.random.uniform(0,10000) 4465
random.seed(4465)
np.random.seed(4465)
tf.random.set_seed(4465)


In [2]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = 0.975
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.queue_length=5000
        self.model = self.build_model()
        self.memory = pd.DataFrame(columns=["state", "action", "next_state", "reward", "done"])
        self.memory_size=20000
        
    def build_model(self,
                    n_hidden_layers=2, 
                    hidden_layer_size=32, 
                    activation='relu',
                    reg_penalty=0.001,
                    dropout=0.0675,
                    verbose=True
                   ):
        """return keras NN model per inputs
        input is a state - array of size state_size
        output is an array of action values - array of size action_size
        """

        model = Sequential()

        for i in range(n_hidden_layers):
            if verbose:
                print("layer %d size %d, %s, reg_penalty %.8f, dropout %.3f" % (i + 1, 
                                                                                hidden_layer_size, 
                                                                                activation,
                                                                                reg_penalty,
                                                                                dropout,
                                                                               ))
            # add dropout, but not on inputs, only between hidden layers
            if i and dropout:
                model.add(Dropout(dropout))

            if i==0: # first layer, specify input shape
                model.add(Dense(input_shape=(state_size,),
                                units = hidden_layer_size, 
                                activation = activation,
                                kernel_initializer = keras.initializers.glorot_uniform(),
                                kernel_regularizer=keras.regularizers.l2(reg_penalty),
                                name = "Dense%02d" % i))
            else: #use implicit input shape
                model.add(Dense(units = hidden_layer_size, 
                                activation = activation,
                                kernel_initializer = keras.initializers.glorot_uniform(),
                                kernel_regularizer=keras.regularizers.l2(reg_penalty),
                                name = "Dense%02d" % i))

        model.add(Dense(self.action_size, activation='linear'))

        if verbose:
            print(model.summary())

        model.compile(loss='mse', optimizer=Adam(learning_rate=self.learning_rate))

        return model
        
    def remember(self, state, action, reward, next_state, done):
        # append in place
        self.memory.loc[self.memory.shape[0]]=[state[0], action, next_state[0], reward, done]
            
    def train(self, batch_size):
        # get batch_size observations from memory
        self.memory = self.memory[-self.memory_size:]
        try:
            minibatch = self.memory.sample(n=batch_size)
        except:
            pdb.set_trace()
        
        # target is our best estimate of value of each action
        X_fit = np.concatenate(minibatch['state'].values)
        X_fit = X_fit.reshape((batch_size, self.state_size))
        Y_pred = self.model.predict(X_fit)

        # we don't just fit model against model's own prediction, that would get us nowhere
        # we improve the target by what we learned about the action we actually took
        # value is reward obtained + predicted value of the observed next state
        minibatch['target_observed'] = minibatch['reward']
        # if done, target is the reward 
        # reward by gym env is only 1 for each timestep of survival
        # but we also added a reward of -10 on failure
        # if not done, add gamma discount rate * Q-value prediction for the observed next state
        not_done = minibatch.loc[minibatch['done'] == False]
        X_observed = np.concatenate(not_done['next_state'].values)
        X_observed = X_observed.reshape((not_done.shape[0], self.state_size))
        # run all predictions at once
        # iterates faster but does not train after each prediction
        y_observed_pred = np.amax(self.model.predict(X_observed), axis=1)
        minibatch.loc[minibatch['done'] == False, 'target_observed'] += self.gamma * y_observed_pred
        # vectorized vlookup - update y_pred column specified by action using target_observed
        np.put_along_axis(Y_pred, 
                          minibatch['action'].astype(int).values.reshape(batch_size,1), 
                          minibatch['target_observed'].values.reshape(batch_size,1),
                          axis=1)
        # fit model against improved target
        # arbitrary 8 batch size to reduce variance a little and speed up fit
        self.model.fit(X_fit, Y_pred, epochs=1, batch_size=8, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])
    
    def load(self, filename, memory=True):
        with open('%s.json' % filename, 'r') as json_file:
            self.model = model_from_json(json_file.read())
        self.model.load_weights("%s.h5" % filename)
        if memory:
            self.memory = pickle.load(open( "%s.p" % filename, "rb"))
        print("loaded model from %s" % filename)

    def save(self, filename, memory=True):
        # serialize model to JSON
        with open("%s.json" % filename, "w") as json_file:
            json_file.write(self.model.to_json())
        # serialize weights to HDF5
        self.model.save_weights("%s.h5" % filename)
        if memory:
            pickle.dump( self.memory, open( "%s.p" % filename, "wb" ) )
        print("saved model to %s" % filename)

        

In [3]:
#https://gym.openai.com/envs/CartPole-v0/
env = gym.make('CartPole-v1')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
batch_size = 64
n_episodes = 1000
fail_penalty = -20

output_dir = 'model_output/cartpole/'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m



Parameters to load are deprecated.  Call .resolve and .require separately.



In [4]:
agent = DQNAgent(state_size, action_size)

layer 1 size 32, relu, reg_penalty 0.00100000, dropout 0.068
layer 2 size 32, relu, reg_penalty 0.00100000, dropout 0.068
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Dense00 (Dense)              (None, 32)                160       
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
Dense01 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense (Dense)                (None, 2)                 66        
Total params: 1,282
Trainable params: 1,282
Non-trainable params: 0
_________________________________________________________________
None


In [14]:
# prev model
loadmodel = 'model_0010'
agent.load(output_dir + loadmodel)
agent.model.compile(loss='mse', optimizer=Adam(learning_rate=agent.learning_rate))
agent.epsilon = 0.01
agent.save('savemodel')

for e in range(n_episodes):
    print ('Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
    
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    done = False
    timesteps = 0
    
    while not done:
        env.render()
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else fail_penalty
        next_state = next_state.reshape([1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("{} episode: {}/{}, score: {}, epsilon: {:.02}"
                  .format(time.strftime("%H:%M:%S"), e, n_episodes, timesteps, agent.epsilon))
        timesteps +=1
    if len(agent.memory) > batch_size*2:
        #pdb.set_trace()
        agent.train(max(batch_size, int(agent.memory.shape[0] *0.05)))
    if e % 10 == 0:
        agent.save(output_dir + "model_%.04d" % e)

loaded model from model_output/cartpole/model_0010
saved model to savemodel
Memory usage: 1213165568 (kb)
13:56:14 episode: 0/1000, score: 138, epsilon: 0.01
saved model to model_output/cartpole/model_0000
Memory usage: 1312010240 (kb)
13:56:25 episode: 1/1000, score: 119, epsilon: 0.01
Memory usage: 1362161664 (kb)
13:56:48 episode: 2/1000, score: 308, epsilon: 0.01
Memory usage: 1524064256 (kb)
13:57:18 episode: 3/1000, score: 392, epsilon: 0.01
Memory usage: 1752883200 (kb)
13:57:20 episode: 4/1000, score: 10, epsilon: 0.01
Memory usage: 1761001472 (kb)
13:57:29 episode: 5/1000, score: 116, epsilon: 0.01
Memory usage: 1829273600 (kb)
13:57:48 episode: 6/1000, score: 239, epsilon: 0.01
Memory usage: 1953976320 (kb)
13:57:54 episode: 7/1000, score: 81, epsilon: 0.01
Memory usage: 2004385792 (kb)
13:58:00 episode: 8/1000, score: 65, epsilon: 0.01
Memory usage: 2037915648 (kb)
13:58:01 episode: 9/1000, score: 9, epsilon: 0.01
Memory usage: 2037915648 (kb)
13:58:09 episode: 10/1000, scor

14:05:38 episode: 93/1000, score: 26, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:40 episode: 94/1000, score: 8, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:42 episode: 95/1000, score: 28, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:43 episode: 96/1000, score: 9, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:44 episode: 97/1000, score: 11, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:53 episode: 98/1000, score: 110, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:05:59 episode: 99/1000, score: 84, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:06:00 episode: 100/1000, score: 8, epsilon: 0.01
saved model to model_output/cartpole/model_0100
Memory usage: 3989401600 (kb)
14:06:07 episode: 101/1000, score: 88, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:06:15 episode: 102/1000, score: 115, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:06:16 episode: 103/1000, score: 15, epsilon: 0.01
Memory usage: 3989401600 (kb)
14:06:26 episode: 104/1000, score: 133, epsilon

Memory usage: 4279459840 (kb)
14:08:31 episode: 188/1000, score: 10, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:32 episode: 189/1000, score: 10, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:33 episode: 190/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0190
Memory usage: 4279459840 (kb)
14:08:35 episode: 191/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:36 episode: 192/1000, score: 10, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:37 episode: 193/1000, score: 8, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:38 episode: 194/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:39 episode: 195/1000, score: 8, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:40 episode: 196/1000, score: 7, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:41 episode: 197/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:42 episode: 198/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:08:43 episode:

Memory usage: 4279459840 (kb)
14:10:29 episode: 282/1000, score: 7, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:30 episode: 283/1000, score: 8, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:31 episode: 284/1000, score: 7, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:32 episode: 285/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:33 episode: 286/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:34 episode: 287/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:35 episode: 288/1000, score: 8, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:36 episode: 289/1000, score: 9, epsilon: 0.01
Memory usage: 4279459840 (kb)
14:10:58 episode: 290/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0290
Memory usage: 4665847808 (kb)
14:10:59 episode: 291/1000, score: 9, epsilon: 0.01
Memory usage: 4665847808 (kb)
14:11:00 episode: 292/1000, score: 9, epsilon: 0.01
Memory usage: 4665847808 (kb)
14:11:02 episode: 29

Memory usage: 4811567104 (kb)
14:16:29 episode: 376/1000, score: 121, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:16:35 episode: 377/1000, score: 95, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:16:43 episode: 378/1000, score: 111, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:16:55 episode: 379/1000, score: 159, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:04 episode: 380/1000, score: 129, epsilon: 0.01
saved model to model_output/cartpole/model_0380
Memory usage: 4811567104 (kb)
14:17:05 episode: 381/1000, score: 8, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:14 episode: 382/1000, score: 130, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:20 episode: 383/1000, score: 72, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:21 episode: 384/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:30 episode: 385/1000, score: 120, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17:36 episode: 386/1000, score: 66, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:17

Memory usage: 4811567104 (kb)
14:22:50 episode: 470/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0470
Memory usage: 4811567104 (kb)
14:22:52 episode: 471/1000, score: 10, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:22:53 episode: 472/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:22:54 episode: 473/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:22:55 episode: 474/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:01 episode: 475/1000, score: 76, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:02 episode: 476/1000, score: 7, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:12 episode: 477/1000, score: 124, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:16 episode: 478/1000, score: 59, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:17 episode: 479/1000, score: 8, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:23:25 episode: 480/1000, score: 109, epsilon: 0.01
saved model to model_output/cartpole/model_

Memory usage: 4811567104 (kb)
14:25:56 episode: 564/1000, score: 10, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:25:57 episode: 565/1000, score: 8, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:25:58 episode: 566/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:25:59 episode: 567/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:01 episode: 568/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:02 episode: 569/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:03 episode: 570/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0570
Memory usage: 4811567104 (kb)
14:26:05 episode: 571/1000, score: 7, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:06 episode: 572/1000, score: 10, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:07 episode: 573/1000, score: 9, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:09 episode: 574/1000, score: 8, epsilon: 0.01
Memory usage: 4811567104 (kb)
14:26:10 episode: 

14:31:27 episode: 658/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:29 episode: 659/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:32 episode: 660/1000, score: 44, epsilon: 0.01
saved model to model_output/cartpole/model_0660
Memory usage: 5067739136 (kb)
14:31:34 episode: 661/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:36 episode: 662/1000, score: 14, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:39 episode: 663/1000, score: 46, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:41 episode: 664/1000, score: 10, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:42 episode: 665/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:43 episode: 666/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:44 episode: 667/1000, score: 7, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:45 episode: 668/1000, score: 10, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:31:47 episode: 669/1000, score: 8, epsilon

14:34:07 episode: 752/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:09 episode: 753/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:10 episode: 754/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:11 episode: 755/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:12 episode: 756/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:13 episode: 757/1000, score: 10, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:14 episode: 758/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:16 episode: 759/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:17 episode: 760/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0760
Memory usage: 5067739136 (kb)
14:34:18 episode: 761/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:20 episode: 762/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:34:21 episode: 763/1000, score: 8, epsilon: 0.

Memory usage: 5067739136 (kb)
14:42:43 episode: 847/1000, score: 112, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:42:44 episode: 848/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:42:48 episode: 849/1000, score: 42, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:42:49 episode: 850/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0850
Memory usage: 5067739136 (kb)
14:42:51 episode: 851/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:42:52 episode: 852/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:01 episode: 853/1000, score: 115, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:03 episode: 854/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:14 episode: 855/1000, score: 133, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:15 episode: 856/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:17 episode: 857/1000, score: 18, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:43:19 epi

14:47:27 episode: 941/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:28 episode: 942/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:29 episode: 943/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:31 episode: 944/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:32 episode: 945/1000, score: 7, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:33 episode: 946/1000, score: 7, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:34 episode: 947/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:36 episode: 948/1000, score: 9, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:37 episode: 949/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:38 episode: 950/1000, score: 9, epsilon: 0.01
saved model to model_output/cartpole/model_0950
Memory usage: 5067739136 (kb)
14:47:40 episode: 951/1000, score: 8, epsilon: 0.01
Memory usage: 5067739136 (kb)
14:47:41 episode: 952/1000, score: 8, epsilon: 0.0

In [None]:
# saved best model
# training as above does well but not stable, performance sometimes goes off a cliff
# continuing to train sometimes results in forgetting what it learned
# kept a long history and sampled over whole history
# early stopping training when achieved a good model and saved it, run it here without training
# ran repeatedly, when it fell off a cliff restarted using best previous model
loadmodel = 'good'
agent.load(loadmodel, memory=False)
agent.model.compile(loss='mse')

for e in range(n_episodes):
    print ('Memory usage: %s (kb)' % resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)
    
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    done = False
    timesteps = 0
    
    while not done:
        env.render()
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        reward = reward if not done else fail_penalty
        next_state = next_state.reshape([1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("{} episode: {}/{}, score: {}, epsilon: {:.02}"
                  .format(time.strftime("%H:%M:%S"), e, n_episodes, timesteps, agent.epsilon))
        timesteps +=1
#   don't bother training
#   if len(agent.memory) > batch_size*2:
#       agent.train(max(batch_size, len(agent.memory *0.05)))
    if e % 10 == 0:
        agent.save(output_dir + "model_%.04d" % e)

loaded model from good
Memory usage: 1957720064 (kb)
10:06:42 episode: 0/1000, score: 499, epsilon: 0.01
saved model to model_output/cartpole/model_0000
Memory usage: 1957720064 (kb)
10:07:27 episode: 1/1000, score: 499, epsilon: 0.01
Memory usage: 1957720064 (kb)
10:08:14 episode: 2/1000, score: 499, epsilon: 0.01
Memory usage: 2015617024 (kb)
10:09:02 episode: 3/1000, score: 499, epsilon: 0.01
Memory usage: 2058985472 (kb)
10:09:43 episode: 4/1000, score: 499, epsilon: 0.01
Memory usage: 2058985472 (kb)
10:10:32 episode: 5/1000, score: 499, epsilon: 0.01
Memory usage: 2434764800 (kb)
10:11:15 episode: 6/1000, score: 499, epsilon: 0.01
Memory usage: 2434764800 (kb)
10:11:59 episode: 7/1000, score: 499, epsilon: 0.01
Memory usage: 2661457920 (kb)
10:12:37 episode: 8/1000, score: 499, epsilon: 0.01
Memory usage: 2661457920 (kb)
10:13:13 episode: 9/1000, score: 499, epsilon: 0.01
Memory usage: 2661457920 (kb)
10:13:59 episode: 10/1000, score: 499, epsilon: 0.01
saved model to model_outpu

In [None]:
# simulate market data

# 2) make data, acceleration toward trend, starts a little off
# 3) add random perturbation
# 4) make that series the acceleration so acceleration is random around a trend
# 5) make data with that acceleration


startindex = 100
amplitude = 1
stockprice = startindex + amplitude
trend = 0.001
forcemult = 0.0001
stocktrend = 0.0

trendindex = startindex
trendseries = []
stockseries = []
date = []
for i in range(1000):
    acceleration = trendindex - stockprice
    stocktrend = stocktrend + acceleration * forcemult
    stockprice *= (1+stocktrend)
    stockseries.append(stockprice)
    
    trendseries.append(trendindex)
    date.append(i)
    trendindex *= (1 + trend)
    
trendseries = pd.DataFrame({'dateindex': date, 'trend' : trendseries, 'stock': stockseries})


fig = go.Figure()
fig.add_trace(go.Scatter(y=trendseries['trend'], x=trendseries['dateindex'],))
fig.add_trace(go.Scatter(y=trendseries['stock'], x=trendseries['dateindex'],))

fig.show()