In [19]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from stable_baselines3 import DDPG
from stable_baselines3.common.env_util import make_vec_env

from environment import tradeEnv

# Load data
df_train = pd.read_csv("./dataset/yahoo_finance_train.csv")
df_test = pd.read_csv("./dataset/yahoo_finance_test.csv")

def set_seeds(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


def train_test(hyperparameters: dict, algo=DDPG):
    # Set up training environment
    set_seeds(6885)
    train_env = tradeEnv(df=df_train)
    train_env = make_vec_env(lambda: train_env, n_envs=1)

    # Train PPO model
    model = algo("MlpPolicy", train_env, verbose=0, **hyperparameters)
    model.learn(total_timesteps=10000)
    model.save("ppo_stock_trading")

    #=================================================#
    # Set up testing environment
    test_env = tradeEnv(df=df_test)
    test_env = make_vec_env(lambda: test_env, n_envs=1)

    # Test PPO model
    model = algo.load("ppo_stock_trading", env=test_env)
    obs = test_env.reset()
    returns = df_test.pct_change()

    # Calculate returns
    portfolio_weights = []
    for _ in range(len(df_test)):
        action, _states = model.predict(obs, deterministic=True)
        action_normalized = F.softmax(torch.tensor(action), dim=-1).numpy()
        #assert (np.sum(action_normalized[0]) == 1)
        portfolio_weights.append(action_normalized[0])
        obs, _, _, _ = test_env.step(action)

    portfolio_weights_ppo = np.array(portfolio_weights)
    return_stocks_ppo = np.sum(returns.multiply(portfolio_weights_ppo), axis=1)

    return hyperparameters, return_stocks_ppo

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator


In [7]:
ddpg_hyperparameters_sets = [
    {'learning_rate': 0.001, 'batch_size': 64, 'gamma': 0.99, 'buffer_size': 1000000, 'tau': 0.005},
    {'learning_rate': 0.0005, 'batch_size': 128, 'gamma': 0.98, 'buffer_size': 500000, 'tau': 0.01},
    {'learning_rate': 0.0001, 'batch_size': 256, 'gamma': 0.97, 'buffer_size': 100000, 'tau': 0.02},
    {'learning_rate': 0.002, 'batch_size': 32, 'gamma': 0.96, 'buffer_size': 500000, 'tau': 0.005},
    {'learning_rate': 0.00025, 'batch_size': 64, 'gamma': 0.95, 'buffer_size': 1000000, 'tau': 0.01},
    {'learning_rate': 0.00075, 'batch_size': 128, 'gamma': 0.94, 'buffer_size': 500000, 'tau': 0.005},
    {'learning_rate': 0.0002, 'batch_size': 256, 'gamma': 0.93, 'buffer_size': 100000, 'tau': 0.02},
    {'learning_rate': 0.0015, 'batch_size': 32, 'gamma': 0.92, 'buffer_size': 500000, 'tau': 0.01},
    {'learning_rate': 0.0003, 'batch_size': 64, 'gamma': 0.91, 'buffer_size': 1000000, 'tau': 0.015},
    {'learning_rate': 0.0008, 'batch_size': 128, 'gamma': 0.90, 'buffer_size': 500000, 'tau': 0.01}
]

# Each dictionary in this list represents a different set of hyperparameters for DDPG.


In [21]:
results = {} # wide dataframe
for _, params in enumerate(ddpg_hyperparameters_sets):
    p, ret = train_test(params)
    results[str(p)] = ret

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  o

In [22]:
pd.DataFrame(results).to_csv("./results/ddpg_returns.csv")

In [24]:
df = pd.DataFrame(results)