In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from custom_trading_env import TradingEnv
from utils import device
import DQNTradingAgent.dqn_agent as dqn_agent
from custom_hyperparameters import hyperparams


In [2]:
class args():
    def __init__(self):
        self.device_num=3
        self.save_num=1
        self.risk_aversion=1
        self.n_episodes=100
        self.fee = 0.001
    
args = args()
# device_num, save_num, risk_aversion, n_episodes, fee

device = torch.device("cuda:{}".format(args.device_num))
dqn_agent.set_device(device)

save_location = 'saves/Original/{}'.format(args.save_num)

if not os.path.exists(save_location):
    os.makedirs(save_location)

In [3]:
save_interval  = 200
print_interval = 1

n_episodes   = args.n_episodes
sample_len   = 480
obs_data_len = 192
step_len     = 1
fee          = args.fee
sell_at_end  = False

risk_aversion_multiplier = 0.5 + args.risk_aversion / 2

n_action_intervals = 5

init_budget = 1

torch.save(hyperparams, os.path.join(save_location, "hyperparams.pth"))

df = pd.read_hdf('dataset/binance_data_train.h5', 'STW')
df.fillna(method='ffill', inplace=True)

In [4]:
global sample_len

env = TradingEnv(custom_args=args, env_id='custom_trading_env', obs_data_len=obs_data_len, step_len=step_len, sample_len=sample_len,
                 df=df, fee=fee, initial_budget=1, n_action_intervals=n_action_intervals, deal_col_name='c',
                 sell_at_end=sell_at_end,
                 feature_names=['o', 'h','l','c','v',
                                'num_trades', 'taker_base_vol'])
agent = dqn_agent.Agent(action_size=2 * n_action_intervals + 1, obs_len=obs_data_len, num_features=env.reset().shape[-1], **hyperparams)

beta = 0.4
beta_inc = (1 - beta) / 1000
agent.beta = beta

scores_list = []
loss_list = []


[2019-08-28 22:29:07,706] Making new env: custom_trading_env
  self.price = self.df_sample[self.price_name].as_matrix()
  self.obs_features = self.df_sample[self.using_feature].as_matrix()


In [6]:
for n_epi in range(n_episodes):
    

    # if (i_episode + 1) % 500 == 0:
    #     sample_len += 480
    #     env.sample_len = sample_len

    state = env.reset()
    score = 0.
    actions = []
    rewards = []

    # for t in range(num_steps):
    while True:
        action = int(agent.act(state, eps=0.))
        next_state, reward, done, _ = env.step(action)

        rewards.append(reward)
        score += reward
        if reward < 0:
            reward *= risk_aversion_multiplier
        if sell_at_end and done:
            action = 2 * n_action_intervals
        actions.append(action)
        agent.step(state, action, reward, next_state, done)
        state = next_state
        if done:
            break
    else:
        agent.memory.reset_multisteps()

    beta = min(1, beta + beta_inc)
    agent.beta = beta

    scores_list.append(score)

    if n_epi % print_interval == 0 and n_epi != 0:
        print_str = "# of episode: {:d}, avg score: {:.4f}\n  Actions: {}".format(n_epi, sum(scores_list[-print_interval:]) / print_interval, np.array(actions))
        print(print_str)
        # with open(os.path.join(save_location, "output_log.txt"), mode='a') as f:
        #     f.write(print_str + '\n')

    if n_epi % save_interval == 0:
        torch.save(agent.qnetwork_local.state_dict(), os.path.join(save_location, 'TradingGym_Rainbow_{:d}.pth'.format(n_epi)))
        torch.save(scores_list, os.path.join(save_location, 'scores.pth'))

del env


# of episode: 1, avg score: -0.1308
  Actions: [ 2  2  4  5  4  2  2  2  4  2  7  2  9  5  2  7  7  2  6  2  2  5  2  2
  0  2  4  4  5  4  4  2  2  4  9  2  2  7  2  4  2  1  5  2  4  4  4  4
  3  2  2  2  2  2  5  2  0  2  2  2  2  2  4  4  2  2  4  3  4  0  2  2
  5  4  4  7  4  5  2  7  6  4  2  2  2  3  4  0  2  4  2  5  2  2  7  2
  6  4  4  2  2  4  2  2  2  2  2  2  5  2  9  7  2  4  4  2  2  2  2  4
  2  4  2  2  2  2  4  2  2  4  2  2  2  4  2  9  9  2  0  2  2  2  2  4
  5  9  2  4  4  4  2  2  2  2  2  2  4  4  5  2  4  2  2  4  2  2  2  4
  2  2  2  0  2  5  4  2  4  4  9  0  2  2  2  4  4  2  5  4  2  4  1  2
  2  5  5  2  2  4  9  9  4  5  2  2  4  2  2  9  5  2  2 10  4  2  9  5
  5  2  2  2  2  2  5  5  4  9  2  2  2  2  2  2  4  2  9  2  0  2 10  2
  2  2  4  2  2  2  2  1  2  9  5  0  4  2  2  2  4  4  5  4  4  4  4  2
  4  2  5  2  2  2  2  4  2  2  9  0  4  2  2  2  9  5  2  5  2  6  2]
# of episode: 2, avg score: -0.0229
  Actions: [ 2  4  2  5  5  1  2  2  2  2  

KeyboardInterrupt: 