In [43]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from stable_baselines3 import A2C, PPO, DDPG
from stable_baselines3.common.env_util import make_vec_env
from environment import tradeEnv
from tqdm import tqdm


def set_seeds(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

In [44]:
# Load data
df_train = pd.read_csv("./dataset/train.csv", index_col=0).reset_index(drop=True)
df_test = pd.read_csv("./dataset/val.csv", index_col=0).reset_index(drop=True)
#df_test = pd.read_csv("./dataset/test.csv", index_col=0).reset_index(drop=True)

In [8]:
# df_train = pd.read_csv("./dataset/yahoo_finance_train.csv")
# df_test = pd.read_csv("./dataset/yahoo_finance_test.csv")

In [45]:
ddpg_hyperparameters_sets = [
    {'learning_rate': 0.001, 'batch_size': 64, 'gamma': 0.99, 'buffer_size': 1000000, 'tau': 0.005},
    {'learning_rate': 0.0005, 'batch_size': 128, 'gamma': 0.98, 'buffer_size': 500000, 'tau': 0.01},
    {'learning_rate': 0.0001, 'batch_size': 256, 'gamma': 0.97, 'buffer_size': 100000, 'tau': 0.02},
    {'learning_rate': 0.002, 'batch_size': 32, 'gamma': 0.96, 'buffer_size': 500000, 'tau': 0.005},
    {'learning_rate': 0.00025, 'batch_size': 64, 'gamma': 0.95, 'buffer_size': 1000000, 'tau': 0.01},
    {'learning_rate': 0.00075, 'batch_size': 128, 'gamma': 0.94, 'buffer_size': 500000, 'tau': 0.005},
    {'learning_rate': 0.0002, 'batch_size': 256, 'gamma': 0.93, 'buffer_size': 100000, 'tau': 0.02},
    {'learning_rate': 0.0015, 'batch_size': 32, 'gamma': 0.92, 'buffer_size': 500000, 'tau': 0.01},
    {'learning_rate': 0.0003, 'batch_size': 64, 'gamma': 0.91, 'buffer_size': 1000000, 'tau': 0.015},
    {'learning_rate': 0.0008, 'batch_size': 128, 'gamma': 0.90, 'buffer_size': 500000, 'tau': 0.01}
]

a2c_hyperparameters_sets = [
    {'learning_rate': 0.0007, 'gamma': 0.99, 'gae_lambda': 0.92, 'vf_coef': 0.5, 'ent_coef': 0.01, 'max_grad_norm': 0.5, 'n_steps': 5},
    {'learning_rate': 0.0005, 'gamma': 0.98, 'gae_lambda': 0.95, 'vf_coef': 0.25, 'ent_coef': 0.02, 'max_grad_norm': 1, 'n_steps': 10},
    {'learning_rate': 0.0001, 'gamma': 0.97, 'gae_lambda': 0.9, 'vf_coef': 0.5, 'ent_coef': 0.02, 'max_grad_norm': 0.5, 'n_steps': 20},
    {'learning_rate': 0.001, 'gamma': 0.96, 'gae_lambda': 0.85, 'vf_coef': 0.3, 'ent_coef': 0.01, 'max_grad_norm': 1, 'n_steps': 5},
    {'learning_rate': 0.0003, 'gamma': 0.95, 'gae_lambda': 0.8, 'vf_coef': 0.4, 'ent_coef': 0.03, 'max_grad_norm': 0.8, 'n_steps': 15},
    {'learning_rate': 0.0002, 'gamma': 0.94, 'gae_lambda': 0.93, 'vf_coef': 0.2, 'ent_coef': 0.01, 'max_grad_norm': 0.6, 'n_steps': 10},
    {'learning_rate': 0.0008, 'gamma': 0.93, 'gae_lambda': 0.88, 'vf_coef': 0.6, 'ent_coef': 0.005, 'max_grad_norm': 0.7, 'n_steps': 20},
    {'learning_rate': 0.0004, 'gamma': 0.92, 'gae_lambda': 0.87, 'vf_coef': 0.7, 'ent_coef': 0.02, 'max_grad_norm': 0.9, 'n_steps': 5},
    {'learning_rate': 0.0006, 'gamma': 0.91, 'gae_lambda': 0.89, 'vf_coef': 0.3, 'ent_coef': 0.015, 'max_grad_norm': 1, 'n_steps': 15},
    {'learning_rate': 0.0009, 'gamma': 0.90, 'gae_lambda': 0.86, 'vf_coef': 0.4, 'ent_coef': 0.01, 'max_grad_norm': 0.8, 'n_steps': 10}
]


ppo_hyperparameters_sets = [
    {'learning_rate': 0.00025, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 10, 'gamma': 0.99, 'gae_lambda': 0.95, 'clip_range': 0.2, 'ent_coef': 0.01},
    {'learning_rate': 0.0003, 'n_steps': 1024, 'batch_size': 32, 'n_epochs': 4, 'gamma': 0.98, 'gae_lambda': 0.92, 'clip_range': 0.3, 'ent_coef': 0.0},
    {'learning_rate': 0.0002, 'n_steps': 512, 'batch_size': 128, 'n_epochs': 20, 'gamma': 0.99, 'gae_lambda': 0.9, 'clip_range': 0.1, 'ent_coef': 0.02},
    {'learning_rate': 0.0001, 'n_steps': 256, 'batch_size': 64, 'n_epochs': 15, 'gamma': 0.97, 'gae_lambda': 0.94, 'clip_range': 0.2, 'ent_coef': 0.05},
    {'learning_rate': 0.0005, 'n_steps': 2048, 'batch_size': 32, 'n_epochs': 10, 'gamma': 0.95, 'gae_lambda': 0.95, 'clip_range': 0.25, 'ent_coef': 0.01},
    {'learning_rate': 0.00005, 'n_steps': 1024, 'batch_size': 64, 'n_epochs': 5, 'gamma': 0.99, 'gae_lambda': 0.98, 'clip_range': 0.15, 'ent_coef': 0.03},
    {'learning_rate': 0.0002, 'n_steps': 2048, 'batch_size': 128, 'n_epochs': 4, 'gamma': 0.98, 'gae_lambda': 0.93, 'clip_range': 0.2, 'ent_coef': 0.02},
    {'learning_rate': 0.00015, 'n_steps': 512, 'batch_size': 32, 'n_epochs': 8, 'gamma': 0.96, 'gae_lambda': 0.95, 'clip_range': 0.1, 'ent_coef': 0.04},
    {'learning_rate': 0.00035, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 12, 'gamma': 0.97, 'gae_lambda': 0.96, 'clip_range': 0.2, 'ent_coef': 0.01},
    {'learning_rate': 0.0001, 'n_steps': 1024, 'batch_size': 128, 'n_epochs': 6, 'gamma': 0.95, 'gae_lambda': 0.97, 'clip_range': 0.25, 'ent_coef': 0.02}
]


In [46]:
def train_test(hyperparameters: dict, algo=A2C):
    # Set up training environment
    set_seeds(6885)
    train_env = tradeEnv(df=df_train)
    train_env = make_vec_env(lambda: train_env, n_envs=1)

    # Train PPO model
    model = algo("MlpPolicy", train_env, verbose=0, **hyperparameters)
    model.learn(total_timesteps=10000)
    model.save("algo_stock_trading")

    #=================================================#
    # Set up testing environment
    test_env = tradeEnv(df=df_test)
    test_env = make_vec_env(lambda: test_env, n_envs=1)

    # Test model
    model = algo.load("algo_stock_trading", env=test_env)
    obs = test_env.reset()
    returns = df_test.pct_change()

    # Calculate returns
    portfolio_weights = []
    for _ in range(len(df_test)):
        action, _states = model.predict(obs, deterministic=True)
        action_normalized = F.softmax(torch.tensor(action), dim=-1).numpy()
        #assert (np.sum(action_normalized[0]) == 1)
        portfolio_weights.append(action_normalized[0])
        obs, _, _, _ = test_env.step(action)

    portfolio_weights = np.array(portfolio_weights)
    return_stocks_ppo = np.sum(returns.multiply(portfolio_weights), axis=1)

    return portfolio_weights, hyperparameters, return_stocks_ppo

In [47]:
results = {} # wide dataframe
for _, params in tqdm(enumerate(a2c_hyperparameters_sets) ):
    _, p, ret = train_test(params, algo=A2C)
    results[str(p)] = ret
pd.DataFrame(results).to_csv("./re3/a2c.csv")

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  o

In [48]:
results = {} # wide dataframe
for _, params in tqdm(enumerate(ppo_hyperparameters_sets) ):
    _, p, ret = train_test(params, algo=PPO)
    results[str(p)] = ret
pd.DataFrame(results).to_csv("./re3/ppo.csv")

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
10it [00:59,  5.92s/it]


In [49]:
results = {} # wide dataframe
for _, params in tqdm(enumerate(ddpg_hyperparameters_sets) ):
    _, p, ret = train_test(params, algo=DDPG)
    results[str(p)] = ret
pd.DataFrame(results).to_csv("./re3/ddpg.csv")

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  o