# Investment Return Optimization with AutoML for [M6 Competition](https://m6competition.com/)

<br>__[Aleksei Mashlakov](https://aleksei-mashlakov.github.io/)__ 
<br>
<br>
<a href='https://www.buymeacoffee.com/amashlakov' target='_blank'><img height='50' style='border:0px;height:50px;' src='https://www.buymeacoffee.com/assets/img/guidelines/download-assets-2.svg' border='0' alt='Buy Me a Coffee' /></a>

> Based on [Riskfolio-Lib](https://riskfolio-lib.readthedocs.io/en/latest/) Tutorial 34: Comparing Covariance Estimators Methods and [Optuna](https://optuna.org/) for Hyperparameter Tuning

> :warning: **NO INVESTMENT ADVICE** :warning:​ This notebook is for educational/informational purposes only. The author is not responsible for any losses incurred as a result of using this notebook. 

In [None]:
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format='retina'

import os
import numpy as np
import pandas as pd
import yfinance as yf
import warnings
import datetime
import riskfolio as rp
from pathlib import Path
import plotly.express as px
import plotly.io as pio
import chart_studio.plotly as py
# pio.renderers.default = "notebook"
pio.templates.default = "simple_white"

np.random.seed(42)
warnings.filterwarnings("ignore")
# pd.options.display.float_format = '{:.4%}'.format

In [None]:
import chart_studio
username=''
api_key=''
chart_studio.tools.set_credentials_file(username=username,
                                        api_key=api_key)

In [None]:
%reload_ext autoreload

In [None]:
wd_path = Path.cwd().parent 
os.chdir(wd_path)
data_path = wd_path / "data" 

### Downloading the train and target data:

In [None]:
# download the price data
# !curl -O https://m6competition.com/api/data/assets/assets_m6.csv -O ../data/raw/assets_m6.csv -q

# download the ticker data
# !wget https://storage.googleapis.com/m6-bucket-webapp-public/M6_Universe.csv  -O ../data/template/M6_Universe.csv -q

In [None]:
# Date range
start = '2021-01-01'
end = '2023-02-03'
# end = datetime.now().date().strftime(format=("%Y-%m-%d"))

#The M6 asset universe
assets = [
  "ABBV","ACN","AEP","AIZ","ALLE","AMAT","AMP","AMZN","AVB","AVY",
  "AXP","BDX","BF-B","BMY","BR","CARR","CDW","CE","CHTR","CNC",
  "CNP","COP","CTAS","CZR","DG","DPZ","DRE","DXC","META","FTV",
  "GOOG","GPC","HIG","HST","JPM","KR","OGN","PG","PPL","PRU",
  "PYPL","RE","ROL","ROST","UNH","URI","V","VRSK","WRK","XOM",
  "IVV","IWM","EWU","EWG","EWL","EWQ","IEUS","EWJ","EWT","MCHI",
  "INDA","EWY","EWA","EWH","EWZ","EWC","IEMG","LQD","HYG","SHY",
  "IEF","TLT","SEGA.L","IEAA.L","HIGH.L","JPEA.L","IAU","SLV","GSG","REET",
  "ICLN","IXN","IGF","IUVL.L","IUMO.L","SPMV.L","IEVL.L","IEFM.L","MVEU.L","XLK",
  "XLF","XLV","XLE","XLY","XLI","XLC","XLU","XLP","XLB","VXX"]

# Downloading data
train_data = yf.download(assets, start = start, end = end, ignore_tz=True)
train_data = train_data['Adj Close']
train_data = train_data.ffill()
train_data = train_data.drop(["DRE"], axis=1)
train_data.index = pd.to_datetime(train_data.index).date
train_data.head()

> Note that DRE stock has been removed 

In [None]:
# Read asset prices data (as provided by the M6 submission platform)

path = data_path / "raw" / "assets_m6.csv"
m6_price_data = pd.read_csv(path)
m6_price_data["date"] = pd.to_datetime(m6_price_data["date"])
m6_price_data["symbol"] = m6_price_data["symbol"].replace("FB", "META")
# m6_price_data = m6_price_data.pivot(index="date", columns="symbol", values="price")
m6_price_data.head()

In [None]:
# fill missing values for DRE (there should be more elegant way to do this but I'm lazy)
dre_date = m6_price_data[m6_price_data["symbol"]=="DRE"]["date"]
meta_date = m6_price_data[m6_price_data["symbol"]=="META"]["date"]
missing_dre_dates = pd.concat([dre_date.reset_index(drop=True), 
           meta_date.reset_index(drop=True)], axis=1).isna().iloc[:,0]

price = m6_price_data[m6_price_data["symbol"]=="DRE"]["price"].values[-1]
dates = meta_date.reset_index(drop=True)[missing_dre_dates].reset_index(drop=True)

for date in dates:
    m6_price_data = m6_price_data.append({"date": date, "symbol": "DRE", "price": price}, ignore_index=True)

In [None]:
assert m6_price_data[m6_price_data["symbol"]=="DRE"].sort_values(by="date").iloc[-1, 2] == 48.2

In [None]:
# transform the price data to 20 days prc returns
from src.ticker_features import calculate_pct_returns

train_data = train_data.apply(calculate_pct_returns, periods=20, axis=0).dropna()
train_data.head()

In [None]:
df_submission = pd.read_csv(data_path / "template/template.csv")
df_submission["ID"] = df_submission["ID"].replace("FB", "META")
df_submission.head()

> Lets plot the data to see the trends

In [None]:
m6_dates = []
for i, start in enumerate(pd.date_range(start="2022-03-04", end="2023-02-03", freq='28D', inclusive="left")):
    end = start + pd.Timedelta(days=28)
    m6_dates.append(start)
    print(f"Competition Month {i+1:2d}: {start.date()} -- {end.date()}")

In [None]:
fig = px.line(train_data, title="20 days returns")
for i, date in enumerate(m6_dates):
    fig.add_vline(
        x=date,
        line_width=1,
        line_dash="dash",
        line_color="black",
    )
    fig.add_annotation(x=date + datetime.timedelta(days=14), y=0.6,
            text=f"M{i+1}",
            showarrow=False,
            yshift=10)
fig.add_hline(
        y=1.0,
        line_width=1,
        line_dash="dash",
        line_color="black",
    )
fig.update_traces(line=dict(width=1))
# py.plot(fig, filename=f"m6_competition_assets", auto_open = True)
fig.show()

> See the seasonality? Me too. 

### Risk measures available

```
    - 'MV': Standard Deviation.
    - 'MAD': Mean Absolute Deviation.
    - 'MSV': Semi Standard Deviation.
    - 'FLPM': First Lower Partial Moment (Omega Ratio).
    - 'SLPM': Second Lower Partial Moment (Sortino Ratio).
    - 'CVaR': Conditional Value at Risk.
    - 'EVaR': Entropic Value at Risk.
    - 'WR': Worst Realization (Minimax)
    - 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
    - 'ADD': Average Drawdown of uncompounded cumulative returns.
    - 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
    - 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
    - 'UCI': Ulcer Index of uncompounded cumulative returns.
```

In [None]:
import optuna
import os

# Turn off optuna log notes.
optuna.logging.set_verbosity(optuna.logging.WARN)

import pyrootutils

# root = pyrootutils.setup_root("..", dotenv=True, pythonpath=True)

from src.portfolio_options import RMS, kellys, method_covs, mus, rms_short
from src.portfolio_tuning import (
    PortfolioConfig,
    PortfolioOptConfig,
    backtest_M6_ir,
    logging_callback,
)


class Objective(object):
    def __init__(self, m6_price_data, m6_returns_data, m6_submission, m6_dates):
        # Hold this implementation specific arguments as the fields of the class.
        self.price_data = m6_price_data
        self.returns_data = m6_returns_data
        self.submission_template = m6_submission
        self.dates = m6_dates

    def __call__(self, trial):
        # Calculate an objective value by using the extra arguments.
        rm = trial.suggest_categorical("rms", RMS)  # or rms_short
        # if rm in ["CVaR", "EVaR", "CDaR", "EDaR"]:
        #     alpha = round(trial.suggest_float("alpha", 0.01, 0.1, step=0.01, log=False), 2)
        # else:
        #     alpha = 0.05
        
        alpha = 0.05
        long_ratio = round(trial.suggest_float("upperlng", 0.1, 1.0, step=0.1), 2)
        short_ratio = round(1.0 - long_ratio, 2)
        weeks_lookback = 10  # trial.suggest_int("lags_past_weeks", 4, 40, log=False)
        obj = "Sharpe"       # trial.suggest_categorical("objective", ["MinRisk", "Sharpe"])
        kelly = False        # trial.suggest_categorical("kelly", kellys)
        mu = "hist"          # trial.suggest_categorical("mu", mus)
        cov = "hist"         # trial.suggest_categorical("cov", method_covs) 
        port_params = PortfolioConfig(
            sht=True, uppersht=short_ratio, upperlng=long_ratio, alpha=alpha
        )
        print(rm)
        opt_config = PortfolioOptConfig(
            model="Classic",
            rm=rm,
            obj=obj,
            mu=mu,
            cov=cov,
            weeks_lookback=weeks_lookback,
            kelly=kelly,
        )

        return backtest_M6_ir(
            port_params=port_params,
            opt_config=opt_config,
            m6_price_data=self.price_data,
            returns_data=self.returns_data,
            df_submission=self.submission_template,
            start=self.dates[0], 
            end=self.dates[-1]
        )


study_name = "test"
os.remove(data_path / f"results/{study_name}.db")
study = optuna.create_study(
    direction="maximize",
    sampler=optuna.samplers.TPESampler(seed=42, n_startup_trials=50),
    storage="sqlite:///" + str(data_path) + f"/results/{study_name}.db",
    study_name=study_name,
    load_if_exists=False,
)

objective = Objective(
    m6_price_data=m6_price_data, m6_returns_data=train_data, m6_submission=df_submission, m6_dates=m6_dates
)

study.optimize(
    objective, n_trials=5, callbacks=[logging_callback], show_progress_bar=True
)

In [None]:
print(f"Best trial: \n{study.best_trial}\n")
print(f"Best value: {study.best_value}\n")
print(f"Best params: {study.best_params}\n")

In [None]:
# print(study.trials)

from optuna.visualization import plot_contour
from optuna.visualization import plot_edf
from optuna.visualization import plot_intermediate_values
from optuna.visualization import plot_optimization_history
from optuna.visualization import plot_parallel_coordinate
from optuna.visualization import plot_param_importances
from optuna.visualization import plot_slice

import plotly.io as pio
pio.renderers.default = "notebook"
pio.templates.default = "plotly_dark"

plot_contour(study)
# plot_intermediate_values(study)
fig = plot_parallel_coordinate(study)

In [None]:
fig.show()

In [None]:
import os
study_name = "IR"
results_directory =  Path().cwd() / "data/results"
if not os.path.exists(results_directory):
    os.makedirs(results_directory)
study.trials_dataframe().to_csv(f"{results_directory}/{study_name}.csv", index=False)
plot_parallel_coordinate(study).write_html(f"{results_directory}/{study_name}_parallel.html")
plot_contour(study).write_html(f"{results_directory}/{study_name}_contour.html")

### Estimating optimized Portfolios 

In [None]:

# Building the portfolio object
port = rp.Portfolio(returns=train_data)
start = train_data.index[-1]
asset_data_fit = train_data[(train_data.index>(start - pd.Timedelta(days=7*35)))].copy()

# Select method and estimate input parameters:

method_mu='hist' # Method to estimate expected returns based on historical data.
method_cov='hist' # Method to estimate covariance matrix based on historical data.
port.assets_stats(method_mu=method_mu, method_cov=method_cov, d=0.95)

# Configuring short weights options

port.sht = True # Allows to use Short Weights
port.uppersht = 0.3 # Maximum value of sum of short weights in absolute value
port.upperlng = 0.7 # Maximum value of sum of positive weights
port.budget = port.upperlng - port.uppersht

# Estimate optimal portfolio:
model='Classic' # Could be Classic (historical), BL (Black Litterman) or FM (Factor Model)
rm = 'EVaR' # Risk measure used, this time will be variance
obj = 'Sharpe' # Objective function, could be MinRisk, MaxRet, Utility or Sharpe
hist = False # Use historical scenarios for risk measures that depend on scenarios
rf = 0 # Risk free rate
l = 0 # Risk aversion factor, only useful when obj is 'Utility'

w = port.optimization(model=model, rm=rm, obj=obj, rf=rf, l=l, hist=hist)

display(w.T)


print(w.abs().sum())
print(w[w>0].sum())
print(w[w<0].sum())

In [None]:
w.loc["DRE","weights"] = 0.0

In [None]:
w.style.format("{:.2%}").background_gradient(cmap='YlGn')

In [None]:
# Plotting the composition of the portfolio

ax = rp.plot_pie(w=w, title='Sharpe Mean Variance', others=0.05, nrow=25, cmap = "tab20",
                 height=6, width=10, ax=None)
1 - w.round(5).abs().sum()

In [None]:
import matplotlib.pyplot as plt

# Plotting a comparison of assets weights for each portfolio

fig = plt.gcf()
fig.set_figwidth(14)
fig.set_figheight(6)
ax = fig.subplots(nrows=1, ncols=1)

w.plot.bar(ax=ax)

In [None]:
# Plotting the risk composition of the portfolio

ax = rp.plot_risk_con(w, cov=port.cov, returns=port.returns, rm=rm, rf=0, alpha=0.01,
                      color="tab:blue", height=6, width=10, ax=None)

In [None]:
df_submission = pd.read_csv(Path.cwd().parent / "data/results/submission_sub12.csv", index_col=0)
# df_submission = pd.read_csv("../data/template/template.csv", index_col=0)
w = w.reindex(index=df_submission.index.values)
# df_submission['ID'] = df_submission['ID'].replace('FB', 'META')
df_submission.iloc[:,-1] = w.values
df_submission.iloc[:,-1] = df_submission.iloc[:,-1].round(5)
df_submission.iloc[0, -1] = df_submission.iloc[0, -1] + (1 - w.round(5).abs().sum())
df_submission.iloc[:,-1] = df_submission.iloc[:,-1].round(5)
df_submission.reset_index().to_csv(Path.cwd().parent / "data/results/submission_sub12.csv", index=False)
(df_submission.abs()).sum(axis=0)

### Check submissions for Q2 

In [None]:
# Plotting the composition of the portfolio
M4 = pd.read_csv(data_path / "results/M4.csv", index_col=0, sep="\t")
ax = rp.plot_pie(w=M4[["Decision"]], title='Portfolio for Month 4', others=0.05, nrow=25, cmap = "tab20",
                 height=6, width=10, ax=None)

In [None]:
# Plotting the composition of the portfolio
M5 = pd.read_csv(data_path / "results/M5.csv", index_col=0, sep="\t")
ax = rp.plot_pie(w=M5[["Decision"]], title='Portfolio for Month 5', others=0.05, nrow=25, cmap = "tab20",
                 height=6, width=10, ax=None)

In [None]:
# Plotting the composition of the portfolio
M6 = pd.read_csv(data_path / "results/M6.csv", index_col=0, sep="\t")
ax = rp.plot_pie(w=M6[["Decision"]], title='Portfolio for Month 6', others=0.05, nrow=25, cmap = "tab20",
                 height=6, width=10, ax=None)