In [8]:
import tensorflow as tf
import numpy as np
import os
import pandas as pd
import random
from tensorflow.python.platform import gfile

In [9]:
class QLearningDecisionPolicy:
    def __init__(self, actions, input_dim, model_dir):
        # TODO: tuning hyperparameters
        # select action function hyperparameter
        self.epsilon = 0.5
        # q functions hyperparameter
        self.gamma = 0.01
        # neural network hyperparmeter
        self.lr = 0.01

        self.actions = actions
        output_dim = len(actions)

        # neural network input and output placeholder
        self.x = tf.placeholder(tf.float32, [None, input_dim])
        self.y = tf.placeholder(tf.float32, [output_dim])

        # TODO: build your Q-network
        # 2-layer fully connected network
        fc = tf.layers.dense(self.x, 20, activation=tf.nn.relu)
        self.q = tf.layers.dense(fc, output_dim)

        # loss
        loss = tf.square(self.y - self.q)

        # train operation
        self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)

        # session
        self.sess = tf.Session()

        # initalize variables
        init_op = tf.global_variables_initializer()
        self.sess.run(init_op)

        # saver
        self.saver = tf.train.Saver()

        # restore model
        ckpt = tf.train.get_checkpoint_state(model_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("load model: %s" % ckpt.model_checkpoint_path)
            self.saver.restore(self.sess, ckpt.model_checkpoint_path) 

    def select_action(self, current_state, is_training=True):

        if random.random() >= self.epsilon or not is_training:
            action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})#########
            action_idx = np.argmax(action_q_vals)
            action = self.actions[action_idx]
        else:  # randomly select action
            action = self.actions[random.randint(0, len(self.actions)-1)]

        return action

    def update_q(self, current_state, action, reward, next_state):
        # Q(s, a)
        action_q_vals = self.sess.run(self.q, feed_dict={self.x: current_state})
        # Q(s', a')
        next_action_q_vals = self.sess.run(self.q, feed_dict={self.x: next_state})
        # a' index
        next_action_idx = np.argmax(next_action_q_vals)
        # create target
        action_q_vals[0, self.actions.index(action)] = reward + self.gamma * next_action_q_vals[0, next_action_idx]
        # delete minibatch dimension
        action_q_vals = np.squeeze(np.asarray(action_q_vals))
        self.sess.run(self.train_op, feed_dict={self.x: current_state, self.y: action_q_vals})

    def save_model(self, output_dir, step):
        if not gfile.Exists(output_dir):
            gfile.MakeDirs(output_dir)

        checkpoint_path = output_dir + '/model'
        self.saver.save(self.sess, checkpoint_path, global_step=step)

In [10]:
def do_action(action, budget, num_stocks, stock_price):
    # TODO: define action's operation
    if action == "Buy" and budget >= stock_price:
        budget -= stock_price
        num_stocks += 1
    elif action == "Sell" and num_stocks > 0:
        budget += stock_price
        num_stocks -= 1
    else:
        action = "Hold"
    return budget, num_stocks, action

def run_simulation(policy, initial_budget, initial_num_stocks, open_prices, close_prices, features):
    budget = initial_budget
    num_stocks = initial_num_stocks
    stock_price = 0

    for i in range(len(open_prices)-1):
        # TODO: define state
        current_state = np.asmatrix(np.hstack((features[i], budget, num_stocks)))

        # calculate current portfolio value
        stock_price = float(open_prices[i])
        current_portfolio = budget + num_stocks * stock_price

        # select action
        action = policy.select_action(current_state, i)

        # update portfolio values based on action
        budget, num_stocks, action = do_action(action, budget, num_stocks, stock_price)

        # calculate new portofolio after taking action
        stock_price = float(close_prices[i])
        new_portfolio = budget + num_stocks * stock_price

        # calculate reward from taking an action at a state
        # TODO: define reward
        reward = new_portfolio - current_portfolio

        # TODO: define state
        next_state = np.asmatrix(np.hstack((features[i+1], budget, num_stocks)))

        # update the policy after experiencing a new action
        policy.update_q(current_state, action, reward, next_state)

    # compute final portfolio worth
    portfolio = budget + num_stocks * stock_price

    print('budget: {}, shares: {}, stock price: {} =>  portfolio: {}'.format(budget, num_stocks, stock_price, portfolio))
    return portfolio

def run_simulations(policy, budget, num_stocks, open_prices, close_prices, features, num_epoch):
    final_portofolios = list()

    for i in range(num_epoch):
        print("simuration no.{}".format(i))
        final_portofolio = run_simulation(policy, budget, num_stocks, open_prices, close_prices, features)
        final_portofolios.append(final_portofolio)
    print(final_portofolios[-1])

In [14]:
def symbol_to_path(symbol, base_dir="data"):
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def merge_data(start_date, end_date, symbols):
    dates = pd.date_range(start_date, end_date)

    df = pd.DataFrame(index=dates)
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col="Date", parse_dates=True,
                              usecols=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'], na_values=['nan'])
        df_temp = df_temp.rename(columns={'Open': symbol + '_open', 'High': symbol + '_high', 'Low': symbol + '_low',
                                          'Close': symbol + '_close', 'Volume': symbol + '_volume'})
        df = df.join(df_temp)

    # TODO: cleaning or filling missing value
    df = df.dropna()
    return df

def make_features(start_date, end_date, is_training):

    # TODO: Choose symbols to read
    # symbols = ['Celltrion', 'HyundaiMobis', 'HyundaiMotor', 'KOSPI', 'LGChemical', 'LGH&H', 'POSCO',
    # 'SamsungElectronics', 'SamsungElectronics2', 'ShinhanFinancialGroup', 'SKhynix']
    symbols = ['Celltrion']

    table = merge_data(start_date, end_date, symbols)

    # TODO: select columns to use
    s_close = table['SamsungElectronics_close']
    s_open = table['SamsungElectronics_open']

    # TODO: make features
    input_days = 3

    features = list()
    for a in range(len(s_close)-input_days):
        features.append(s_close[a:a+input_days])

    s_close = s_close[input_days:]
    s_open = s_open[input_days:]

    test_days = 10
    if not is_training:
        return s_open[-test_days:], s_close[-test_days:], features[-test_days:]

    return s_open, s_close, features

if __name__ == "__main__":
    open, close, feature = make_features('2010-01-01', '2019-05-08', False)

KeyError: 'SamsungElectronics_close'

In [12]:
def test(policy, initial_budget, initial_num_stocks, open_prices, close_prices, features):

    budget = initial_budget
    num_stocks = initial_num_stocks

    for i in range(len(open_prices)):
        current_state = np.asmatrix(np.hstack((features[i], budget, num_stocks)))
        action = policy.select_action(current_state, is_training=False)
        stock_price = float(open_prices[i])

        budget, num_stocks, action = do_action(action, budget, num_stocks, stock_price)

    portfolio = budget + num_stocks * close_prices[-1]

    print('Finally, you have')
    print('budget: %f won' % budget)
    print('Shares: %i' % num_stocks)
    print('Share value: %f won' % close_prices[-1])
    print()
    return portfolio

## simulation

In [13]:
open_prices, close_prices, features = make_features('2010-01-01', '2019-05-08', is_training=True)

# TODO: define action
actions = ["Buy", "Sell", "Hold"]

policy = QLearningDecisionPolicy(actions, len(features[0]) + 2, "model")

budget = 100000000.0
num_stocks = 0
num_epoch = 1
run_simulations(policy, budget, num_stocks, open_prices, close_prices, features, num_epoch)

# TODO: fix checkpoint directory name
policy.save_model("LFD_project4_team00", num_epoch)

simuration no.0
budget: 99992850.0, shares: 10, stock price: 44850.0 =>  portfolio: 100441350.0
100441350.0


## test

In [7]:
open_prices, close_prices, features = make_features('2010-01-01', '2019-05-08', is_training=False)

# TODO: define action
actions = ["Buy", "Sell", "Hold"]

budget = 100000000.0
num_stocks = 0

# TODO: fix checkpoint directory name
policy = QLearningDecisionPolicy(actions, len(features[0]) + 2, "LFD_project4_team00") # 수중돈 주식개수 추가가 +2
final_portfolio = test(policy, budget, num_stocks, open_prices, close_prices, features)

print("Final portfolio: %f won" % final_portfolio)

load model: LFD_project4_team00/model-1
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from LFD_project4_team00/model-1


NotFoundError: Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key beta1_power_1 not found in checkpoint
	 [[node save_1/RestoreV2 (defined at <ipython-input-2-3d1d86307ef7>:37) ]]

Caused by op 'save_1/RestoreV2', defined at:
  File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 539, in run_forever
    self._run_once()
  File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1775, in _run_once
    handle._run()
  File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/usr/local/lib/python3.7/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/lib/python3.7/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 225, in wrapper
    runner = Runner(result, future, yielded)
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 708, in __init__
    self.run()
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/usr/local/lib/python3.7/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3214, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/usr/local/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-f7d6fc057898>", line 10, in <module>
    policy = QLearningDecisionPolicy(actions, len(features[0]) + 2, "LFD_project4_team00") # 수중돈 주식개수 추가가 +2
  File "<ipython-input-2-3d1d86307ef7>", line 37, in __init__
    self.saver = tf.train.Saver()
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 832, in __init__
    self.build()
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 844, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 881, in _build
    build_save=build_save, build_restore=build_restore)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 513, in _build_internal
    restore_sequentially, reshape)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 332, in _AddRestoreOps
    restore_sequentially)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/training/saver.py", line 580, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1572, in restore_v2
    name=name)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.7/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

NotFoundError (see above for traceback): Restoring from checkpoint failed. This is most likely due to a Variable name or other graph key that is missing from the checkpoint. Please ensure that you have not altered the graph expected based on the checkpoint. Original error:

Key beta1_power_1 not found in checkpoint
	 [[node save_1/RestoreV2 (defined at <ipython-input-2-3d1d86307ef7>:37) ]]
