In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')
from finrl.config_tickers import DOW_30_TICKER,NAS_100_TICKER,HSI_50_TICKER,DAX_30_TICKER,SP_500_TICKER
import datetime
import seaborn as sns
import pickle
%matplotlib inline
from finrl.config_tickers import DOW_30_TICKER
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import data_split
from stock_env import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent,DRLEnsembleAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,trx_plot
from feature_engineer import FeatureEngineer
from models import DRLAgent
from portfolio_optimization_env import PortfolioOptimizationEnv
from pprint import pprint
import scienceplots
import sys
sys.path.append("../FinRL-Library")
import quantstats as qs
plt.style.use('science')
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)

check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

def DRL_prediction(model, environment,time_window, deterministic=True):
    """make a prediction and get results"""
    test_env, test_obs = environment.get_sb_env()
    account_memory = None  # This help avoid unnecessary list creation
    actions_memory = None  # optimize memory consumption
    # state_memory=[] #add memory pool to store states

    test_env.reset()
    max_steps = len(environment._df.index.unique()) - time_window - 1

    for i in range(len(environment._df.index.unique())):
        action, _states = model.predict(test_obs, deterministic=deterministic)
        # account_memory = test_env.env_method(method_name="save_asset_memory")
        # actions_memory = test_env.env_method(method_name="save_action_memory")
        test_obs, rewards, dones, info = test_env.step(action)
        if i == max_steps:  # more descriptive condition for early termination to clarify the logic
            date_list = environment._date_memory
            portfolio_return = environment._portfolio_return_memory
            # print(len(date_list))
            # print(len(asset_list))
            df_account_value = pd.DataFrame(
                {"date": date_list, "daily_return": portfolio_return,'account' :  environment._asset_memory["final"],'weights':environment._final_weights}
            )
            df_date = pd.DataFrame(date_list)
            df_date.columns = ["date"]

            action_list = environment._actions_memory
            df_actions = pd.DataFrame(action_list)
            tiks = environment._tic_list
            df_actions.columns = np.insert(tiks,0,'POS')
            df_actions.index = df_date.date
            # df_actions = pd.DataFrame({'date':date_list,'actions':action_list})
            account_memory = df_account_value
            actions_memory = df_actions
        # add current state to state memory
        # state_memory=test_env.env_method(method_name="save_state_memory")

        if dones[0]:
            print("hit end!")
            break
    return account_memory, actions_memory,test_obs
TRAIN_START_DATE = '2000-01-01'
TRAIN_END_DATE = '2022-12-30'
TEST_START_DATE = '2023-01-01'
TEST_END_DATE = '2024-10-01'
from pandas import read_csv


df_dow =read_csv('./data/dow.csv')
df_nasdaq =read_csv('./data/nasdaq.csv')
df_hsi = read_csv('./data/hsi.csv')
df_dax = read_csv('./data/dax.csv')
df_sp500 = read_csv('./data/sp500.csv')

In [2]:

INDICATORS = [
    "close_5_ema",
]
fe = FeatureEngineer(use_technical_indicator=False,
                     tech_indicator_list = INDICATORS,
                     use_turbulence=True,
                     user_defined_feature =True)

processed = fe.preprocess_data(df_sp500)
processed = processed.fillna(0)
processed= processed.replace(np.inf,0)
train_data= data_split(processed, TRAIN_START_DATE, TRAIN_END_DATE)
test_data = data_split(processed, TEST_START_DATE, TEST_END_DATE)


Successfully added turbulence index
Successfully added user defined features


In [24]:
train_data_sample = train_data.copy()
test_data_sample = test_data.copy()
np.random.seed(42)

In [None]:
train_data_sample = train_data.copy()
test_data_sample = test_data.copy()
columns = np.random.choice(train_data.tic.unique(),10,replace=True)
train_data_sample = train_data_sample[train_data_sample['tic'].isin(columns)]
test_data_sample = test_data_sample[train_data_sample['tic'].isin(columns)]


In [3]:
final_result = []
models = [
    {'name': 'ppo', 'args': {
        "n_steps": 2048,
        "ent_coef": 0.01,
        "learning_rate": 0.0003,
        "batch_size": 128,
    }},
    {'name': 'a2c', 'args': None}
]
env_kwargs = {
    "initial_amount": 100_0000,
    "normalize_df": None,
    "features": ["close", "return","turbulence"],
    'comission_fee_pct': 0.001,
    'time_window': 1
}
ticks = []
number_of_stocks = [3, 6 ,10]
for index, number_of_stock in enumerate(number_of_stocks):
    result = {}

    train_data_sample = train_data.copy()
    test_data_sample = test_data.copy()

    sample_tics = np.random.choice(
        train_data.tic.unique(), number_of_stock, replace=True)
    train_data_sample = train_data_sample[train_data_sample['tic'].isin(
        sample_tics)]
    test_data_sample = test_data_sample[train_data_sample['tic'].isin(sample_tics)]

    train_environment = PortfolioOptimizationEnv(df=train_data, **env_kwargs)
    test_environment = PortfolioOptimizationEnv(df=test_data, **env_kwargs)
    agent = DRLAgent(env=train_environment)
    for i, m in enumerate(models):
        model = agent.get_model(m['name'], model_kwargs=m['args'])
        ppo_model = agent.train_model(model=model,
                                      tb_log_name = m['name'],
                                      total_timesteps=100_000)
        training_summary = pd.DataFrame(
            {
                "date": train_environment._date_memory,
                "actions": train_environment._actions_memory,
                "weights": train_environment._final_weights,
                "returns": train_environment._portfolio_return_memory,
                "rewards": train_environment._portfolio_reward_memory,
                "portfolio_values": train_environment._asset_memory["final"],
            }
        )
        prediction_summary = DRL_prediction(ppo_model, test_environment, 1)
        result["train"] = training_summary
        result["test"] = prediction_summary
        result["name"] = m['name']
        result["sample_size"] = sample_tics
        final_result.append(result)

{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.0003, 'batch_size': 128}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
--------------------------------------
| time/              |               |
|    fps             | 5             |
|    iterations      | 1             |
|    time_elapsed    | 377           |
|    total_timesteps | 2048          |
| train/             |               |
|    reward          | -0.0029867147 |
--------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 5            |
|    iterations           | 2            |
|    time_elapsed         | 749          |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 2.6570768    |
|    clip_fraction        | 0.863        |
|    clip_range           | 0.2          |
|    entropy_loss         | -488         |
|

KeyboardInterrupt: 

In [5]:
with open('./models/test.pkl','wb') as f:
    pickle.dump(final_result,f)

In [None]:
from pypfopt import expected_returns

df = df.sort_values(['date', 'tic'], ignore_index=True)
df.index = df.date.factorize()[0]
cov_list = []
mu= []
# look back is one year
lookback = 252
for i in range(lookback, len(df.index.unique())):
    data_lookback = df.loc[i-lookback:i, :]
    price_lookback = data_lookback.pivot_table(
        index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    covs = return_lookback.cov().values
    mu.append(expected_returns.mean_historical_return(price_lookback))
    cov_list.append(covs)
df_cov = pd.DataFrame(
{'time': df.date.unique()[lookback:], 'cov_list': cov_list,'returns':mu})
df = df.merge(df_cov, left_on='date',right_on='time')

In [None]:
from agents.evn_mvo import StockPortfolioEnv
from agents.mvo_agent import MarkowitzAgent

test_df = data_split(
df,
start=TEST_START_DATE,
end=TEST_END_DATE
)

stock_dimension = len(test_df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
"hmax": 100,
"initial_amount": 1000000,
"transaction_cost_pct": 0.001,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": INDICATORS,
"action_space": stock_dimension,
"reward_scaling": 1e-4

}
e_test_gym = StockPortfolioEnv(df=test_df, **env_kwargs)
agent = MarkowitzAgent(e_test_gym)
markowitz_history_df = agent.prediction(e_test_gym)
markowitz_history_df["method"] = "markowitz"

stock_dimension = len(test_df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

env_kwargs = {
"hmax": 100,
"initial_amount": 1000000,
"transaction_cost_pct": 0.001,
"state_space": state_space,
"stock_dim": stock_dimension,
"tech_indicator_list": INDICATORS,
"action_space": stock_dimension,
"reward_scaling": 1e-4

}
e_test_gym = StockPortfolioEnv(df=test_df, **env_kwargs)
agent = MarkowitzAgent(e_test_gym,objective='sharp')
shrop_mvo = agent.prediction(e_test_gym)
shrop_mvo["method"] = "markowitz"
