In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import trading_env
from utils import collect_trajectories, device, clipped_surrogate
from PPOTradingAgent.model import CNNTradingAgent
from PPO.common.multiprocessing_env import  SubprocVecEnv

In [2]:
df = pd.read_hdf('dataset/SGXTWsample.h5', 'STW')
df.fillna(method='ffill', inplace=True)

In [3]:
# Hyperparameters
class args:
    def __init__(self,no_short):
        self.no_short = no_short
args = args(True)
device = device
learning_rate = 0.001
discount = 0.995
eps = 0.05
K_epoch = 3
num_steps = 128
beta = 0.4
num_envs = 16

In [None]:
def make_env():
    def _thunk():
        env = trading_env.make(custom_args= args, env_id='training_v1', obs_data_len=256, step_len=16,
                               df=df, fee=0.0, max_position=5, deal_col_name='Price',
                               feature_names=['Price', 'Volume',
                                              'Ask_price', 'Bid_price',
                                              'Ask_deal_vol', 'Bid_deal_vol',
                                              'Bid/Ask_deal', 'Updown'])

        return env

    return _thunk

In [None]:

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# if torch.cuda.is_available():
#     torch.set_default_tensor_type('torch.cuda.FloatTensor')
save_interval = 100

envs = [make_env() for _ in range(num_envs)]
envs = SubprocVecEnv(envs)
model = CNNTradingAgent().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print_interval = 10

scores_list = []
loss_list = []
for n_epi in range(10000):  # 게임 1만판 진행
    n_epi +=1
    loss = 0.0
    log_probs, states, actions, rewards, next_state, masks, values = collect_trajectories(envs,model,num_steps)

    # raise Exception("True" if torch.any(torch.isnan(torch.stack(states))) else "False")
    if beta>0.01:
        beta*=discount
    for _ in range(K_epoch):
        L = -clipped_surrogate(envs,model, log_probs, states, actions, rewards, discount, eps, beta)

        optimizer.zero_grad()
        L.backward()
        optimizer.step()

        loss+=L.item()
        del L


    score = np.asarray(rewards).sum(axis=0).mean()
    scores_list.append(score)
    loss_list.append(loss)

    if n_epi % print_interval == 0 and n_epi != 0:
        print("# of episode :{}, avg score : {:.4f}, loss : {:.6f}".format(
            n_epi, score / print_interval, loss / print_interval))
        print("actions : ", torch.cat(actions))
        

    if n_epi % save_interval ==0:
        torch.save(model.state_dict(), f'TradingGym_{n_epi}.pth')
        torch.save(scores_list, f"plot/{n_epi}_scores.pth")
        plt.plot(scores_list)
        plt.title("Reward")
        plt.grid(True)
        plt.savefig(f'plot/{n_epi}_ppo.png')
        plt.close()

envs.close()


[2019-07-11 13:56:22,308] Making new env: training_v1
[2019-07-11 13:56:22,313] Making new env: training_v1
[2019-07-11 13:56:22,317] Making new env: training_v1
[2019-07-11 13:56:22,323] Making new env: training_v1
[2019-07-11 13:56:22,328] Making new env: training_v1
[2019-07-11 13:56:22,333] Making new env: training_v1
[2019-07-11 13:56:22,338] Making new env: training_v1
[2019-07-11 13:56:22,343] Making new env: training_v1
[2019-07-11 13:56:22,348] Making new env: training_v1
[2019-07-11 13:56:22,353] Making new env: training_v1
[2019-07-11 13:56:22,364] Making new env: training_v1
[2019-07-11 13:56:22,370] Making new env: training_v1
[2019-07-11 13:56:22,358] Making new env: training_v1
[2019-07-11 13:56:22,376] Making new env: training_v1
[2019-07-11 13:56:22,382] Making new env: training_v1
[2019-07-11 13:56:22,388] Making new env: training_v1
  self.price = self.df_sample[self.price_name].as_matrix()
  self.price = self.df_sample[self.price_name].as_matrix()
  self.price = sel

  self.obs_features = self.df_sample[self.using_feature].as_matrix()


# of episode :10, avg score : -0.0824, loss : -0.125099
actions :  tensor([1, 1, 2,  ..., 0, 1, 1], device='cuda:2')
# of episode :20, avg score : 0.1651, loss : -0.119348
actions :  tensor([2, 1, 0,  ..., 2, 0, 1], device='cuda:2')
# of episode :30, avg score : -0.1132, loss : -0.113250
actions :  tensor([2, 1, 0,  ..., 2, 2, 1], device='cuda:2')
# of episode :40, avg score : -0.0331, loss : -0.108172
actions :  tensor([1, 1, 0,  ..., 2, 0, 1], device='cuda:2')
# of episode :50, avg score : 0.1204, loss : -0.102430
actions :  tensor([2, 0, 2,  ..., 2, 0, 0], device='cuda:2')
# of episode :60, avg score : -0.1065, loss : -0.097657
actions :  tensor([0, 1, 0,  ..., 0, 2, 1], device='cuda:2')
# of episode :70, avg score : 0.1535, loss : -0.092856
actions :  tensor([0, 0, 0,  ..., 0, 2, 0], device='cuda:2')
# of episode :80, avg score : 0.1595, loss : -0.088346
actions :  tensor([1, 1, 1,  ..., 2, 2, 2], device='cuda:2')
# of episode :90, avg score : -0.0102, loss : -0.084171
actions :  t

# of episode :710, avg score : -0.0383, loss : -0.003238
actions :  tensor([0, 2, 0,  ..., 0, 2, 2], device='cuda:2')
# of episode :720, avg score : 0.0582, loss : -0.003575
actions :  tensor([2, 1, 0,  ..., 0, 0, 0], device='cuda:2')
# of episode :730, avg score : 0.0821, loss : -0.003508
actions :  tensor([0, 1, 1,  ..., 0, 2, 0], device='cuda:2')
# of episode :740, avg score : -0.0076, loss : -0.003328
actions :  tensor([1, 1, 2,  ..., 0, 0, 1], device='cuda:2')
# of episode :750, avg score : -0.1140, loss : -0.002735
actions :  tensor([0, 1, 1,  ..., 0, 1, 1], device='cuda:2')
# of episode :760, avg score : -0.0424, loss : -0.002440
actions :  tensor([0, 2, 0,  ..., 1, 0, 2], device='cuda:2')
# of episode :770, avg score : -0.0494, loss : -0.003053
actions :  tensor([0, 1, 0,  ..., 0, 1, 0], device='cuda:2')
# of episode :780, avg score : -0.0453, loss : -0.003249
actions :  tensor([0, 1, 2,  ..., 2, 0, 1], device='cuda:2')
# of episode :790, avg score : -0.0367, loss : -0.003179
a

# of episode :1410, avg score : -0.0395, loss : -0.001139
actions :  tensor([0, 1, 0,  ..., 2, 2, 2], device='cuda:2')
# of episode :1420, avg score : 0.0061, loss : -0.001629
actions :  tensor([1, 0, 0,  ..., 0, 2, 0], device='cuda:2')
# of episode :1430, avg score : 0.0574, loss : -0.003096
actions :  tensor([0, 1, 2,  ..., 2, 0, 2], device='cuda:2')
# of episode :1440, avg score : 0.0066, loss : -0.002826
actions :  tensor([0, 0, 0,  ..., 2, 0, 2], device='cuda:2')
# of episode :1450, avg score : 0.0114, loss : -0.003004
actions :  tensor([0, 1, 2,  ..., 1, 1, 2], device='cuda:2')
# of episode :1460, avg score : -0.0163, loss : -0.002691
actions :  tensor([2, 0, 1,  ..., 0, 0, 2], device='cuda:2')
# of episode :1470, avg score : -0.0519, loss : -0.003061
actions :  tensor([2, 0, 0,  ..., 2, 2, 1], device='cuda:2')
# of episode :1480, avg score : -0.0232, loss : -0.003041
actions :  tensor([0, 2, 0,  ..., 2, 0, 2], device='cuda:2')
# of episode :1490, avg score : -0.0113, loss : -0.0