In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from trading_env_bollinger import TradingEnv
import DQNTradingAgent.dqn_agent as dqn_agent
from custom_hyperparameters import hyperparams
from arguments import argparser

In [2]:
# args = argparser() # device_num, save_num, risk_aversion, n_episodes
torch.cuda.manual_seed_all(7)

device = torch.device("cuda:{}".format(2))
dqn_agent.set_device(device)

# save_location = 'saves/Original/{}'.format(args.save_num)
# if not os.path.exists(save_location):
#     os.makedirs(save_location)

save_interval  = 1000
print_interval = 1

n_episodes   = 1000
sample_len   = 480
obs_data_len = 192
step_len     = 1
risk_aversion = 1

risk_aversion_multiplier = 0.5 + risk_aversion / 2

n_action_intervals = 5

init_budget = 1

# torch.save(hyperparams, os.path.join(save_location, "hyperparams.pth"))

df = pd.read_hdf('dataset/binance_data_train.h5', 'STW')
df.fillna(method='ffill', inplace=True)

In [3]:
env = TradingEnv(custom_args=None, env_id='custom_trading_env', obs_data_len=obs_data_len, step_len=step_len, sample_len=sample_len,
                       df=df, fee=0.001, initial_budget=1, n_action_intervals=n_action_intervals, deal_col_name='c', sell_at_end=True,
                       feature_names=['o', 'h','l','c','v',
                                      'num_trades', 'taker_base_vol'])
agent = dqn_agent.Agent(action_size=2 * n_action_intervals + 1, obs_len=obs_data_len, num_features=env.reset().shape[-1], **hyperparams)

beta = 0.4
beta_inc = (1 - beta) / 1000
agent.beta = beta

scores_list = []
loss_list = []
n_epi = 0

[2019-08-27 18:11:21,556] Making new env: custom_trading_env
  self.price = self.df_sample[self.price_name].as_matrix()
  self.obs_features = self.df_sample[self.using_feature].as_matrix()


In [4]:

# for n_epi in range(10000):  # 게임 1만판 진행
for i_episode in range(n_episodes):
    n_epi +=1

    state = env.reset()
    score = 0.
    actions = []
    rewards = []
    fee_rates = []

    while True:
        action = int(agent.act(state, eps=0.))
        actions.append(action)
        next_state, reward, done, _ , fee_rate = env.step(action)

        rewards.append(reward)
        fee_rates.append(fee_rate)
        score += reward
        if reward < 0:
            reward *= risk_aversion_multiplier
        if done:
            action = 2 * n_action_intervals
        agent.step(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
    else:
        agent.memory.reset_multisteps()

    beta = min(1, beta + beta_inc)
    agent.beta = beta

    scores_list.append(score)

    if n_epi % print_interval == 0 and n_epi != 0:
        print_str = "# of episode: {:d}, avg score: {:.4f}\n  Actions: {} \n Fee rate: {}".format(n_epi, sum(scores_list[-print_interval:]) / print_interval, np.array(actions), fee_rate)
        print(print_str)
#         with open(os.path.join(save_location, "output_log.txt"), mode='a') as f:
#             f.write(print_str + '\n')


# of episode: 1, avg score: -0.0124
  Actions: [ 6  9  0 10 10 10  9  9 10 10 10 10 10 10  9 10  9  8 10  7  9 10  8 10
 10  9  9  9  5  9 10  9  4  9  5  7 10 10 10  9 10 10  9  9  5  1  7 10
  9  9  9  9  9 10  9  9  6 10 10  9  9 10 10  5  5 10 10  9 10  9 10  8
  9  7  8 10  9  9  0  8 10 10  9  9  7  9 10  9 10  9  6  7 10 10 10  9
  0 10  9  9  7  6 10 10 10  8  0  9  6  9  9 10  9 10  6  9  9 10  7 10
 10  6 10 10  9 10  9 10  8  7 10 10 10  8  6 10  1 10  9  7  9 10 10 10
  9  9  9  7  8 10  0  9  9  9  9 10  9 10  9  7  4 10 10  4  7  0 10  5
  9 10  9  0  9  8  6 10  7  8  7  8  9  9  6  9  9  9 10  0 10  9  7  9
  7  9  9  9  9  7 10  0 10 10  9 10  9 10 10 10  7  9 10 10  7  6  7  5
  9  6  9  0 10 10  9 10  8 10 10  6 10 10 10  7  0 10 10 10 10 10  9 10
  9 10  9  9 10 10  5  6  0 10  8  9 10  6 10 10  9  9  8  9 10 10 10  0
 10  9  0 10  9  9  7  6  0  9 10 10  7 10  8 10  6  9  6 10  9 10  6] 
 Fee rate: 0.0009973223153581742
# of episode: 2, avg score: -0.0242
  Actions

# of episode: 10, avg score: -0.0368
  Actions: [10  9 10 10  6  9  9  7 10 10  1  0  9 10  1  9  8 10 10  5  9  6  9 10
 10  9 10 10  9  6  7  9  9  9  7 10 10  5  9 10 10  9 10 10  9  9  0  5
  9  0 10 10  0 10 10 10  6 10 10  6 10  6 10  7  9  0  9  7 10  9 10  5
  9  9 10  5  8  0  9  6  9  1 10 10  9  9 10  0  7 10 10  9 10 10 10  0
  9 10 10  0  8  6 10  9  7 10  9  8  9 10  0  1  6  0  7  0  9 10  9  9
  9  0  9 10 10 10 10  8 10 10 10 10  9 10  6  7  9  0  7 10  9  9  5 10
  9  9 10 10 10 10  6  9 10  9 10  9  0 10  9 10 10  0 10  9 10  9  9 10
  5  8  5 10 10  0 10  8 10 10  9  7  9  8  5  0 10  9  9  8 10  8  9  7
  5 10  0 10 10  9 10 10  5 10  7 10  7 10  8 10  7  9  9  7  7 10 10  7
 10  9  7  8  9  6  1  8 10  0  9 10 10  7 10 10  0  1  9  9  9 10  1  9
  9 10  9  8 10  9 10  9  7 10 10  7  5  9 10  9  7  7  9  0  9  8  0 10
  6  9  6  9  9 10  7  6  6  7  7 10 10  6  9  9  5 10 10  7  9  8 10] 
 Fee rate: 0.005171149466382671
# of episode: 11, avg score: -0.0766
  Action

KeyboardInterrupt: 