# Deep Reinforcement Learning for Portfolio Optimization

This experiment demonstrates the application of Deep Reinforcement Learning (DRL) algorithms (`A2C`, `PPO`, `SAC`) for portfolio optimization.  
The workflow includes:  
- Fetching and preprocessing Dow 30 market data using FinRL’s `YahooDownloader` and `FeatureEngineer`.  
- Splitting the data into training (2010–2024) and trading (2024–2024) datasets.  
- Defining a Gym environment (`StockPortfolioEnv`) for portfolio allocation.  
- Training three DRL agents (`A2C`, `PPO`, `SAC`) for 50k timesteps each.  
- Backtesting each agent’s daily returns and computing performance statistics.  
- Constructing a minimum-variance portfolio with `PyPortfolioOpt` and simulating its evolution.  
- Retrieving DJIA benchmark returns for comparison.  
- Plotting cumulative returns of all strategies side by side.  
- Outputting detailed performance metrics for each approach.  

## Dependencies

In [None]:
# ! pip install git+https://github.com/AI4Finance-Foundation/FinRL.git
# ! conda install -n portfolio_opt ipykernel --update-deps --force-reinstall
# ! pip install pandas_market_calendars quantstats gymnasium -q

In [None]:
! pip install pandas numpy matplotlib \
               stable-baselines3 \
               PyPortfolioOpt \
               pandas_market_calendars quantstats gymnasium \
               git+https://github.com/AI4Finance-Foundation/FinRL.git -q

In [4]:
# Standard library imports
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

# Stable Baselines3 imports
from stable_baselines3 import A2C, PPO, SAC

# FinRL imports
from finrl import config
from finrl import config_tickers
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, get_daily_return, get_baseline, convert_daily_return_to_pyfolio_ts

# PyPortfolioOpt imports
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models

%matplotlib inline

# Load data

In [5]:
ticker_list = config_tickers.DOW_30_TICKER
start_date = '2005-01-01'
end_date = (datetime.now() - pd.Timedelta(days=1)).strftime("%Y-%m-%d")

display(f"Downloading data from {start_date} to {end_date}".format(start_date, end_date))
display(f"Tickers: {ticker_list}")

df = YahooDownloader(start_date=start_date, end_date=end_date, ticker_list=ticker_list).fetch_data()

'Downloading data from 2005-01-01 to 2025-04-23'

"Tickers: ['AXP', 'AMGN', 'AAPL', 'BA', 'CAT', 'CSCO', 'CVX', 'GS', 'HD', 'HON', 'IBM', 'INTC', 'JNJ', 'KO', 'JPM', 'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG', 'TRV', 'UNH', 'CRM', 'VZ', 'V', 'WBA', 'WMT', 'DIS', 'DOW']"

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

Shape of DataFrame:  (148857, 8)


## Preprocess data and append technical indicators

In [None]:
print("Preprocessing data...")
fe = FeatureEngineer(use_technical_indicator=True, use_turbulence=False)
df_tech = fe.preprocess_data(df)

df_tech.shape

# Compute covariance matrices and lists for state representation

In [None]:
print("Computing covariance matrices...")

df_sorted = df_tech.sort_values(['date','tic'], ignore_index=True)
df_sorted.index = df_sorted.date.factorize()[0]
cov_list, return_list = [], []
lookback = 252
unique_dates = df_sorted.date.unique()

for i in range(lookback, len(unique_dates)):
    window = df_sorted.loc[i - lookback : i, :]
    price_mat = window.pivot_table(index='date', columns='tic', values='close')
    ret_mat = price_mat.pct_change().dropna()
    return_list.append(ret_mat)
    cov_list.append(ret_mat.cov().values)
    
# Merge back covariances
df_cov = pd.DataFrame({'date': unique_dates[lookback:], 'cov_list': cov_list, 'return_list': return_list})
df_merged = pd.merge(df_tech, df_cov, on='date', how='left')

# Drop initial rows without cov_list
df_final = df_merged[df_merged['cov_list'].notna()].reset_index(drop=True)

display(df_final.shape)
display(df_final.columns)

## Train/Test split

In [None]:
# train_start, train_end = '2021-01-01', '2024-01-01'
train_start, train_end = '2010-01-01', '2024-01-01'
trade_start, trade_end = '2024-01-01', end_date

train_data = data_split(df_final, train_start, train_end)
trade_data = data_split(df_final, trade_start, trade_end)

## Environment setup

In [None]:
stock_dim = len(train_data.tic.unique())
state_space = stock_dim
tech_indicators = config.INDICATORS
env_kwargs = {
    "stock_dim": stock_dim,
    "hmax": 100,
    "initial_amount": 1e6,
    "transaction_cost_pct": 0.001,
    "reward_scaling": 1e-4,
    "state_space": state_space,
    "action_space": stock_dim,
    "tech_indicator_list": tech_indicators,
}
print(
    f"Stock Dim: {stock_dim}, State Space: {state_space}, Indicators: {tech_indicators}"
)

# Create Gym environments
e_train = StockPortfolioEnv(df=train_data, **env_kwargs)
env_train, _ = e_train.get_sb_env()
e_trade = StockPortfolioEnv(df=trade_data, **env_kwargs)

## Train DRL agents

In [None]:
os.makedirs('results', exist_ok=True)
    
# algos = ["a2c", "ppo"]
algos = ["a2c", "ppo", "sac"]
trained_models = {}
for algo in algos:
    print(f"Training {algo.upper()}...")
    agent = DRLAgent(env=env_train)
    model = agent.get_model(algo)
    trained = agent.train_model(model=model, tb_log_name=algo, total_timesteps=50000)
    trained_models[algo] = trained

Save trained models

In [None]:
models_dir = "results/models"

# Create directory for models if it doesn't exist
if not os.path.exists(models_dir):
    os.makedirs(models_dir)

# Save trained models
for algo, model in trained_models.items():
    model.save(f"results/models/{algo}_trained_model")
    print(f"Model {algo} saved to results/models/{algo}_trained_model")

Load saved models

In [None]:
if 'trained_models' not in locals() or not trained_models:
    # Check if model files exist before loading
    model_paths = {
        "a2c": "results/models/a2c_trained_model.zip",
        "ppo": "results/models/ppo_trained_model.zip",
        "sac": "results/models/sac_trained_model.zip",
    }
    
    if all(os.path.exists(path) for path in model_paths.values()):
        trained_models = {
            "a2c": A2C.load(model_paths["a2c"]),
            "ppo": PPO.load(model_paths["ppo"]),
            "sac": SAC.load(model_paths["sac"]),
        }
        print("Models loaded successfully.")
    else:
        print("One or more model files are missing. Please ensure all models are saved correctly.")
else:
    print("Trained models are already set up.")

## Backtest DRL strategies

In [None]:
results = {}
for algo, model in trained_models.items():
    print(f"Backtesting {algo.upper()}...")
    df_ret, _ = DRLAgent.DRL_prediction(model = model, environment = e_trade)
    # Reconstruct cumulative account value from daily returns
    df_ret['account_value'] = (df_ret['daily_return'] + 1).cumprod() * env_kwargs['initial_amount']
    stats = backtest_stats(df_ret, value_col_name='account_value')
    results[algo] = {'df': df_ret, 'stats': stats}

## Calculate minimum-variance portfolio

In [None]:
print("Calculating minimum-variance portfolio...")

dates = trade_data.date.unique()
min_var_vals = [env_kwargs['initial_amount']]
for i in range(len(dates)-1):
    df_curr = trade_data[trade_data.date == dates[i]].reset_index(drop=True)
    df_next = trade_data[trade_data.date == dates[i+1]].reset_index(drop=True)
    cov_mat = np.array(df_curr.cov_list.values[0])
    ef = EfficientFrontier(None, cov_mat, weight_bounds=(0,1))
    ef.min_volatility()
    w = ef.clean_weights()
    prices = df_curr.close.values
    next_prices = df_next.close.values
    shares = np.array(list(w.values())) * min_var_vals[-1] / prices
    min_var_vals.append(np.dot(shares, next_prices))
    
min_var_df = pd.DataFrame({'date': dates, 'account_value': min_var_vals})

## Fetch DJIA benchmark

In [None]:
print("Fetching DJIA benchmark...")
baseline = get_baseline(ticker="^DJI", start=trade_start, end=trade_end)
baseline_ret = get_daily_return(baseline, "close")

## Plot cumulative returns

In [None]:
plt.figure(figsize=(12, 6))
for algo in algos:
    df_ret = results[algo]["df"]
    cump = (df_ret["daily_return"] + 1).cumprod() - 1
    plt.plot(df_ret["date"], cump, label=algo.upper())

# Min-var and DJIA
c_min = (min_var_df["account_value"].pct_change() + 1).cumprod() - 1
plt.plot(min_var_df["date"], c_min, label="MIN_VAR")
c_dji = (baseline_ret + 1).cumprod() - 1
plt.plot(baseline["date"], c_dji, label="DJIA")
plt.legend()
plt.title("Cumulative Return Comparison")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.savefig("results/cumulative_return_comparison.png")
plt.show()

## Review performance stats for each algorithm

In [None]:
perf_stats = pd.DataFrame({algo.upper(): results[algo]['stats'] for algo in algos})
display(perf_stats)