In [1]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.covariance import LedoitWolf
from pypfopt.expected_returns import mean_historical_return
from pypfopt.efficient_frontier import EfficientFrontier

In [2]:
sector_tickers = [
    "XLF",  # Financials
    "XLK",  # Technology
    "XLV",  # Health Care
    "XLY",  # Consumer Discretionary
    "XLP",  # Consumer Staples
    "XLE",  # Energy
    "XLI",  # Industrials
    "XLU",  # Utilities
    "XLB",  # Materials
    "XLRE",  # Real Estate
    "XLC",  # Communication Services
]

In [None]:
# import data from csv
df_ret = pd.read_parquet("../data/returns.parquet")
df_prices = pd.read_parquet("../data/prices.parquet")
df_vol = pd.read_parquet("../data/vola.parquet")

In [32]:
df_ret

Ticker,XLF,XLK,XLV,XLY,XLP,XLE,XLI,XLU,XLB,XLRE,XLC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2006-01-04,-0.000932,0.011664,0.009917,0.000909,0.003407,0.002843,0.002526,0.001561,0.006126,,
2006-01-05,0.003413,0.004627,-0.002470,0.000908,-0.005115,-0.012759,-0.001578,-0.006574,-0.001608,,
2006-01-06,0.005869,0.015576,0.006471,0.008134,0.004265,0.024235,0.003783,0.010622,0.010568,,
2006-01-09,0.002768,0.003176,0.004596,0.010150,0.004670,-0.001123,0.008148,-0.006234,-0.000318,,
2006-01-10,0.000921,-0.000906,-0.003675,0.000297,-0.002544,0.010803,-0.002813,0.001562,-0.006394,,
...,...,...,...,...,...,...,...,...,...,...,...
2021-12-23,0.005165,0.005566,0.004101,0.013776,0.002262,0.000547,0.011712,-0.000144,0.010015,-0.003000,0.008873
2021-12-27,0.010251,0.021226,0.010498,0.009697,0.010575,0.021623,0.010530,0.004865,0.014948,0.020227,0.009683
2021-12-28,0.000510,-0.005222,-0.002632,0.000439,0.005638,-0.000713,0.005034,0.008952,0.005784,0.004310,-0.000507
2021-12-29,-0.001275,0.000853,0.005682,0.001995,0.004566,-0.006442,0.001798,0.005361,0.004095,0.006041,-0.004960


In [None]:
# start loop
# calc current portfolio value ( 100_000 if start )
# calc optimal weights
# rebalance weights

In [None]:
# mean variance optimization
# based on paper by Sood et al. (2023):

# TODO : deal with NaN values for tickers 
# XLC was created June 2018
# XLRE was created Oct 2015 ( as split off from XLF )

# TODO : understand why solver sometimes fails
# maybe write own solver
# maybe eigenvals too small therefore unstable
# why does mu < riskfree rate lead to problems ?

# Parameters
lookback = 60
initial_cash = 100_000
start_date = pd.to_datetime("2019-01-01")
end_date = pd.to_datetime("2020-01-01")

# define daterange from start to end date
date_range = pd.bdate_range(start=start_date, end=end_date)

# Initialize portfolio
portfolio_value = initial_cash
portfolio_history = []

cash = initial_cash
shares = {t: 0 for t in sector_tickers}

for eval_date in date_range:
    if eval_date not in df_ret.index:
        print(f"Date {eval_date} not in data, skipping.")
        continue

    ret_idx = df_ret.index.get_loc(eval_date)
    prices_idx = df_prices.index.get_loc(eval_date)

    if ret_idx < lookback:
        print(f"Not enough data for {eval_date}, skipping.")
        continue

    return_window = df_ret.iloc[ret_idx - lookback : ret_idx]
    prices_window = df_prices.iloc[prices_idx - lookback : prices_idx]

    # update portfolio value with current prices
    prices = df_prices.iloc[prices_idx].to_dict()
    if len(portfolio_history) > 0:
        portfolio_value = sum([shares[t] * prices[t] for t in sector_tickers]) + cash

    # Estimate expected returns over lookback period as simple average
    mu = return_window.mean()
    # check if all of mu are below 0, if yes report error
    if (mu < 0).all():
        print(f"All expected returns are negative for {eval_date}, skipping.")
        continue

    # Estimate covariance using Ledoit-Wolf shrinkage
    lw = LedoitWolf()
    lw.fit(return_window)
    cov_matrix = lw.covariance_

    # Fix potential negative eigenvalues (make PSD)
    eigvals, eigvecs = np.linalg.eigh(cov_matrix)
    eigvals[eigvals < 0] = 0
    cov_psd = eigvecs @ np.diag(eigvals) @ eigvecs.T

    # Calculate optimal weights by optimizing Sharpe ratio
    ef = EfficientFrontier(mu, cov_psd)

    try:
        weights_raw = ef.max_sharpe(risk_free_rate=0)  # ordered dict
    except Exception as e:
        print('= '*20)
        print(f"Error in max_sharpe for {eval_date}: {e}")
        print("mu", mu)
        print("cov_psd", cov_psd)
        print("eigvals", eigvals)
        print('= '*20)
        continue

    # whole shares only
    asset_cash = {t: weights_raw[t] * portfolio_value for t in sector_tickers}
    shares = {t: np.floor(asset_cash[t] / prices[t]) for t in sector_tickers}
    # calc rebalanced weights to compare with DRL agents later
    weights = {t: shares[t] * prices[t] / portfolio_value for t in sector_tickers}
    # rest ist cash
    cash = portfolio_value - np.sum([shares[t] * prices[t] for t in sector_tickers])
    w_c = cash / portfolio_value

    # save portfolio history
    portfolio_history.append(
        {"date": eval_date, "cash": cash, "portfolio_value": portfolio_value}
    )

# Convert to DataFrame
portfolio_df = pd.DataFrame(portfolio_history)
portfolio_df

Date 2019-01-01 00:00:00 not in data, skipping.
All expected returns are negative for 2019-01-03 00:00:00, skipping.
All expected returns are negative for 2019-01-04 00:00:00, skipping.
All expected returns are negative for 2019-01-07 00:00:00, skipping.
All expected returns are negative for 2019-01-08 00:00:00, skipping.
= = = = = = = = = = = = = = = = = = = = 
Error in max_sharpe for 2019-01-09 00:00:00: ('Please check your objectives/constraints or use a different solver.', 'Solver status: infeasible')
mu Ticker
XLF    -0.001775
XLK    -0.001681
XLV    -0.000896
XLY    -0.000588
XLP    -0.000403
XLE    -0.003303
XLI    -0.001971
XLU    -0.000152
XLB    -0.000595
XLRE    0.000079
XLC    -0.000550
dtype: float64
cov_psd [[2.44651658e-04 2.61272208e-04 1.82948769e-04 2.20974312e-04
  1.13006876e-04 2.15626453e-04 2.13517977e-04 5.08496601e-05
  1.92952663e-04 1.00356381e-04 2.10742212e-04]
 [2.61272208e-04 4.91141779e-04 2.75580912e-04 3.60027638e-04
  1.32496382e-04 2.79573871e-04 3.1

Unnamed: 0,date,cash,portfolio_value
0,2019-01-02,2.226646,100000.000000
1,2019-01-10,4.034065,102730.174278
2,2019-01-11,332.985609,102923.327230
3,2019-01-14,234.998915,102602.678427
4,2019-01-15,46.800020,103759.864977
...,...,...,...
242,2019-12-24,85.223967,116308.414335
243,2019-12-26,172.916212,116504.060637
244,2019-12-27,142.438513,116524.396559
245,2019-12-30,94.370335,115872.151022
