# Portfolio Contrsuction

In [1]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import xgboost as xgb
import cvxpy as cp
from sklearn.covariance import LedoitWolf

In [2]:
model_folder = "model"

is_name = os.path.join(model_folder, "df_is.h5")
df_is = pd.read_hdf(is_name)
os_name = os.path.join(model_folder, "df_os.h5")
df_os = pd.read_hdf(os_name)

model_name = os.path.join(model_folder, "xgb.json")
xgb_model = xgb.XGBRegressor()
xgb_model.load_model(model_name)

data_folder = "data"
adjp_name = os.path.join(data_folder, "data_adjp.h5")
df_adjp = pd.read_hdf(adjp_name)

In [3]:
df_is_dts = df_is.reset_index()['Date']
df_is_dt = df_is_dts[~df_is_dts.duplicated()].to_list()
df_os_dts = df_os.reset_index()['Date']
df_os_dt = df_os_dts[~df_os_dts.duplicated()].to_list()

In [4]:
df_isos = pd.concat([df_is, df_os], axis=0, join='inner')
df_isos_dts = df_isos.reset_index()['Date']
df_isos_dt = df_isos_dts[~df_isos_dts.duplicated()].to_list()

In [5]:
# portfolio optimization
portfolio_weights = []

for date in tqdm(df_os_dt):
    # expexted return
    df_temp = df_os.loc[date].copy()
    universe = df_temp.index.to_list()
    features = list(df_temp.columns)[:-1]
    df_features = df_temp[features].copy()
    df_features['ER'] = xgb_model.predict(df_features)

    # covariance estimation with Ledoit-Wolf Shirnkage
    idx = df_adjp.index.to_list().index(date)
    df_p = df_adjp[universe].iloc[idx-252*3:idx+1]
    df_ret = df_p.pct_change().iloc[1:]
    df_ret = df_ret.T.fillna(df_ret.mean(axis=1)).T
    cov = LedoitWolf().fit(df_ret)
    df_cov = pd.DataFrame(cov.covariance_*np.sqrt(252 / 12),
                          index=universe, columns=universe)

    # long-short mean variance optimization
    # define variables
    x = cp.Variable(len(universe))
    var = cp.quad_form(x, df_cov.to_numpy())
    e_r = df_features['ER'].to_numpy() @ x

    # define parameters
    r_a = cp.Parameter(nonneg=True, name='ra')
    ub = cp.Parameter(nonneg=True, name='ub')
    lb = cp.Parameter(nonneg=True, name='lb')
    net = cp.Parameter(nonneg=True, name='net')
    gross = cp.Parameter(nonneg=True, name='gross')

    # define objective, constraints and problem
    objective = cp.Maximize(e_r - r_a * var)
    constraints = [cp.sum(x) == net,
                   cp.norm(x, 1) <= gross,
                   x >= -lb,
                   x <= ub]
    problem = cp.Problem(objective, constraints)

    # define parameter search space
    r_a_values = np.logspace(-2, 3, num=100)
    portfolios = []

    # assign value to non-searching parameters
    ub.value = 0.1
    lb.value = 0.1
    net.value = 0.0
    gross.value = 1.0

    for r_a_value in r_a_values:
        r_a.value = r_a_value
        problem.solve()
        portfolios.append((r_a_value, e_r.value,
                           var.value, x.value))

    # portfolio with max shapre ratio (assume r_f = 0)
    sharpe_ratios = [p[1] / np.sqrt(p[2]) for p in portfolios]
    max_sharpe_idx = np.argmax(sharpe_ratios)
    optimal_portfolio = portfolios[max_sharpe_idx]
    portfolio_weights.append((date, universe,
                              optimal_portfolio[3]))
    
    # run again with transaction cost

100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [00:40<00:00,  1.18s/it]


In [6]:
i = 0
date = portfolio_weights[i][0]
date_1 = portfolio_weights[i+1][0]
weight = pd.Series(portfolio_weights[i][2], index=portfolio_weights[i][1])
idx = df_adjp.index.to_list().index(date)
df_p = df_adjp[universe].loc[date:date_1]
df_ret = df_p.pct_change().iloc[1:]