In [2]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical

import trading_env
from utils import collect_trajectories, device, clipped_surrogate
from PPOTradingAgent.model import CNNTradingAgent
from common.multiprocessing_env import  SubprocVecEnv

In [3]:
df = pd.read_hdf('dataset/SGXTWsample.h5', 'STW')
df.fillna(method='ffill', inplace=True)

In [4]:
# Hyperparameters
class args:
    def __init__(self,no_short):
        self.no_short = no_short
args = args(True)
device = device
learning_rate = 0.001
discount = 0.995
eps = 0.05
K_epoch = 3
num_steps = 128
beta = 0.4
num_envs = 16

In [5]:
def make_env():
    def _thunk():
        env = trading_env.make(custom_args= args, env_id='training_v1', obs_data_len=256, step_len=16,
                               df=df, fee=0.0, max_position=5, deal_col_name='Price',
                               feature_names=['Price', 'Volume',
                                              'Ask_price', 'Bid_price',
                                              'Ask_deal_vol', 'Bid_deal_vol',
                                              'Bid/Ask_deal', 'Updown'])

        return env

    return _thunk

In [6]:

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# if torch.cuda.is_available():
#     torch.set_default_tensor_type('torch.cuda.FloatTensor')
save_interval = 100

envs = [make_env() for _ in range(num_envs)]
envs = SubprocVecEnv(envs)
model = CNNTradingAgent().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

print_interval = 10

scores_list = []
loss_list = []
for n_epi in range(10000):  # Progress 10,000 rounds
    n_epi +=1
    loss = 0.0
    log_probs, states, actions, rewards, next_state, masks, values = collect_trajectories(envs,model,num_steps)

    # raise Exception("True" if torch.any(torch.isnan(torch.stack(states))) else "False")
    if beta>0.01:
        beta*=discount
    for _ in range(K_epoch):
        L = -clipped_surrogate(envs,model, log_probs, states, actions, rewards, discount, eps, beta)

        optimizer.zero_grad()
        L.backward()
        optimizer.step()

        loss+=L.item()
        del L


    score = np.asarray(rewards).sum(axis=0).mean()
    scores_list.append(score)
    loss_list.append(loss)

    if n_epi % print_interval == 0 and n_epi != 0:
        print("# of episode :{}, avg score : {:.4f}, loss : {:.6f}".format(
            n_epi, score / print_interval, loss / print_interval))
        print("actions : ", torch.cat(actions))
        

    if n_epi % save_interval ==0:
        torch.save(model.state_dict(), f'TradingGym_{n_epi}.pth')
        torch.save(scores_list, f"plot/{n_epi}_scores.pth")
        plt.plot(scores_list)
        plt.title("Reward")
        plt.grid(True)
        plt.savefig(f'plot/{n_epi}_ppo.png')
        plt.close()

envs.close()


[2019-07-29 15:54:55,303] Making new env: training_v1
[2019-07-29 15:54:55,307] Making new env: training_v1
[2019-07-29 15:54:55,312] Making new env: training_v1
[2019-07-29 15:54:55,317] Making new env: training_v1
[2019-07-29 15:54:55,322] Making new env: training_v1
[2019-07-29 15:54:55,327] Making new env: training_v1
[2019-07-29 15:54:55,331] Making new env: training_v1
[2019-07-29 15:54:55,336] Making new env: training_v1
[2019-07-29 15:54:55,342] Making new env: training_v1
[2019-07-29 15:54:55,351] Making new env: training_v1
[2019-07-29 15:54:55,347] Making new env: training_v1
[2019-07-29 15:54:55,370] Making new env: training_v1
[2019-07-29 15:54:55,374] Making new env: training_v1
[2019-07-29 15:54:55,356] Making new env: training_v1
[2019-07-29 15:54:55,360] Making new env: training_v1
[2019-07-29 15:54:55,365] Making new env: training_v1


# of episode :10, avg score : 0.8390, loss : -0.128687
actions :  tensor([2, 1, 0,  ..., 2, 0, 0], device='cuda:2')
# of episode :20, avg score : -0.4637, loss : -0.112435
actions :  tensor([2, 0, 2,  ..., 1, 2, 2], device='cuda:2')


Process Process-9:
Process Process-5:
Process Process-8:


# of episode :30, avg score : -0.4068, loss : -0.105584
actions :  tensor([1, 0, 1,  ..., 0, 2, 2], device='cuda:2')


Process Process-13:
Process Process-16:
Process Process-15:
Process Process-14:
Process Process-1:
Process Process-2:
Process Process-6:
Process Process-3:
Process Process-11:
Process Process-12:
Process Process-4:
Process Process-10:
Process Process-7:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (m

  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessing/connection.py", line 250, in recv
    buf = self._recv_bytes()
  File "/home/jeffrey/anaconda3/envs/RL/lib/python3.7/multiprocessi

KeyboardInterrupt: 