In [1]:
import os
print(os.getcwd())
from stable_baselines3 import A2C
from stable_baselines3.common.env_util import make_vec_env
from environment import tradeEnv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F

/Users/tiantian/Library/CloudStorage/GoogleDrive-tc3216@columbia.edu/My Drive/project


In [2]:
# Load training data
df_train = pd.read_csv("./dataset/yahoo_finance_train.csv")
df_test = pd.read_csv("./dataset/yahoo_finance_test.csv")


Using cpu device
-------------------------------------
| time/                 |           |
|    fps                | 2393      |
|    iterations         | 100       |
|    time_elapsed       | 0         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -11.3     |
|    explained_variance | -4.49e-05 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | -449      |
|    std                | 0.995     |
|    value_loss         | 2.81e+05  |
-------------------------------------


  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator


-------------------------------------
| time/                 |           |
|    fps                | 2370      |
|    iterations         | 200       |
|    time_elapsed       | 0         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -11.3     |
|    explained_variance | -0.00039  |
|    learning_rate      | 0.0007    |
|    n_updates          | 199       |
|    policy_loss        | -1.86e+04 |
|    std                | 0.992     |
|    value_loss         | 5.02e+06  |
-------------------------------------
-------------------------------------
| rollout/              |           |
|    ep_len_mean        | 1.41e+03  |
|    ep_rew_mean        | 1.15e+05  |
| time/                 |           |
|    fps                | 2345      |
|    iterations         | 300       |
|    time_elapsed       | 0         |
|    total_timesteps    | 1500      |
| train/                |           |
|    entropy_loss       | -11.3     |
|    explain

In [54]:
def train_test(hyperparameters:dict):

    # Set up environment
    train_env = tradeEnv(df = df_train)
    train_env = make_vec_env(lambda: train_env, n_envs=1)
    # Train
    model = A2C("MlpPolicy", train_env, verbose=0, **hyperparameters)
    model.learn(total_timesteps=10000)
    model.save("a2c_stock_trading")
    
    #=================================================#
    # Set up environment
    test_env = tradeEnv(df=df_test)
    test_env = make_vec_env(lambda: test_env, n_envs=1)
    # Test
    model = A2C.load("a2c_stock_trading", env=test_env)
    obs = test_env.reset()
    returns = df_test.pct_change()

    # Calculate returns
    portfolio_weights = []
    for _ in range(len(df_test)):
        action, _states = model.predict(obs, deterministic=True)
        action_normalized = F.softmax(torch.tensor(action), dim=-1).numpy()
        portfolio_weights.append(action_normalized[0])
        obs, _, _, _ = test_env.step(action)

    portfolio_weights_a2c = np.array(portfolio_weights)

    return_stocks_a2c = np.sum( returns.multiply(portfolio_weights_a2c), axis=1)


    return hyperparameters, return_stocks_a2c

In [55]:
hyperparameters_sets = [
    {'learning_rate': 0.0007, 'gamma': 0.99, 'gae_lambda': 0.92, 'vf_coef': 0.5, 'ent_coef': 0.01, 'max_grad_norm': 0.5, 'n_steps': 5},
    {'learning_rate': 0.0005, 'gamma': 0.98, 'gae_lambda': 0.95, 'vf_coef': 0.25, 'ent_coef': 0.02, 'max_grad_norm': 1, 'n_steps': 10},
    {'learning_rate': 0.0001, 'gamma': 0.97, 'gae_lambda': 0.9, 'vf_coef': 0.5, 'ent_coef': 0.02, 'max_grad_norm': 0.5, 'n_steps': 20},
    {'learning_rate': 0.001, 'gamma': 0.96, 'gae_lambda': 0.85, 'vf_coef': 0.3, 'ent_coef': 0.01, 'max_grad_norm': 1, 'n_steps': 5},
    {'learning_rate': 0.0003, 'gamma': 0.95, 'gae_lambda': 0.8, 'vf_coef': 0.4, 'ent_coef': 0.03, 'max_grad_norm': 0.8, 'n_steps': 15},
    {'learning_rate': 0.0002, 'gamma': 0.94, 'gae_lambda': 0.93, 'vf_coef': 0.2, 'ent_coef': 0.01, 'max_grad_norm': 0.6, 'n_steps': 10},
    {'learning_rate': 0.0008, 'gamma': 0.93, 'gae_lambda': 0.88, 'vf_coef': 0.6, 'ent_coef': 0.005, 'max_grad_norm': 0.7, 'n_steps': 20},
    {'learning_rate': 0.0004, 'gamma': 0.92, 'gae_lambda': 0.87, 'vf_coef': 0.7, 'ent_coef': 0.02, 'max_grad_norm': 0.9, 'n_steps': 5},
    {'learning_rate': 0.0006, 'gamma': 0.91, 'gae_lambda': 0.89, 'vf_coef': 0.3, 'ent_coef': 0.015, 'max_grad_norm': 1, 'n_steps': 15},
    {'learning_rate': 0.0009, 'gamma': 0.90, 'gae_lambda': 0.86, 'vf_coef': 0.4, 'ent_coef': 0.01, 'max_grad_norm': 0.8, 'n_steps': 10}
]

In [56]:
results = {} # wide dataframe
for _, params in enumerate(hyperparameters_sets):
    p, ret = train_test(params)
    results[str(p)] = ret

  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator
  numerator = np.exp(actions)
  denominator = np.sum(np.exp(actions))
  output = numerator/denominator


In [65]:
cum = pd.DataFrame(results)
# .to_csv("./results/a2c.csv")