# Import Dependencies

In [141]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

from matplotlib import pyplot as plt

from pathlib import Path
from tqdm import tqdm

import warnings

warnings.filterwarnings('ignore')

# Load Data

In [142]:
# load 4 dataframes (returns, nb industries, avg firm size, Sum of BE / Sum of ME)
df = pd.read_csv(str(Path().absolute()) + "/data/48_Industry_Portfolios.CSV") 

# split these dataframes
df_list = np.split(df, df[df.isnull().all(1)].index, axis = 0) 

# clean data and convert date column to index
for i in range(len(df_list)):
    df_list[i] = pd.DataFrame(df_list[i])  
    df_list[i] = df_list[i].dropna()  
    df_list[i].loc[:, "Date"] = df_list[i].loc[:, "Date"].astype("int")  
    df_list[i] = df_list[i].set_index("Date")  
    
    # last data frame has yearly data
    if i == (len(df_list) - 1): 
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y")
        df_list[i].index = df_list[i].index + pd.DateOffset(months = 6)
    else:
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y%m")

# create a dataframe of excess returns, nb of industries and avg sizes
df = df_list[0] / 100
mask = (df <= -0.99)
df[mask] = np.nan

nb_industries = df_list[1]
nb_industries[mask] = np.nan

avg_size = df_list[2]
avg_size[mask] = np.nan

be_over_me = df_list[3]
be_over_me[mask] = np.nan

In [143]:
# market cap of each industry over time
mkt_cap = nb_industries * avg_size
print(mkt_cap.shape)

# book value to market value
be_over_me = be_over_me.resample("1MS").ffill()
print(be_over_me.shape)

# momentum with monthly data
momentum = df.rolling(12).mean()
print(momentum.shape)

(1182, 48)
(1177, 48)
(1182, 48)


# Normalize Data

In [144]:
mkt_cap_ = mkt_cap.loc['1927-06-01':'1973-12-01']
mkt_cap_norm = (mkt_cap_ - mkt_cap_.mean()) / mkt_cap_.std()
print(mkt_cap_norm.shape)

be_over_me_ = be_over_me.loc['1927-06-01':'1973-12-01']
be_over_me_norm = (be_over_me_ - be_over_me_.mean()) / be_over_me_.std()
print(be_over_me_norm.shape)

momentum_ = momentum.loc['1927-06-01':'1973-12-01']
momentum_norm = (momentum_ - momentum_.mean()) / momentum_.std()
print(momentum_norm.shape)

df_in = df.loc['1927-06-01':'1973-12-01']
print(df_in.shape)

(559, 48)
(559, 48)
(559, 48)
(559, 48)


In [145]:
def CRRA(wealth, gamma= 5):
    """"Constant Relative Risk Aversion Utility Function"""

    if gamma == 1:
        return np.log(wealth)
    else:
        return ((1 + wealth) ** (1 - gamma)) / (1 - gamma)


In [146]:
characteristics = np.stack([mkt_cap_norm, be_over_me_norm, momentum_norm], axis= -1)
weights = mkt_cap.iloc[-1]/ mkt_cap.iloc[-1].sum()
theta = np.array([-1.451, 3.606, 1.772])

# Optimization

## Objective

In [147]:
def objective(theta:np.ndarray, x:np.ndarray, rets:pd.DataFrame, weights:np.ndarray):
    accrued_wealth = 0.0
    wealth = 0.0
    for t in range(x.shape[0]):
        # finding the number of Nt per row
        rets_valid = ~pd.isna(rets.iloc[t+1, :])
        x_valid = ~np.isnan(x[t, :, :]).any(axis=1)
        Nt = np.sum(rets_valid & x_valid)
        
        for i in range(x.shape[1]):
            if (np.isnan(rets.iloc[t+1,i])) or (np.isnan(x[t,i,:]).any()):
                continue
            wealth += (weights[i] + theta.T @ x[t,i,:] / Nt) * rets.iloc[t+1,i]
        accrued_wealth += CRRA(wealth)
    
    return - accrued_wealth / x.shape[0]

In [148]:
init = np.array([-1.398e+01,  4.366e+01, 2.770e+01]) # local solution found
# init = theta
response = minimize(objective, x0= init, args= (characteristics, df, weights), method= 'SLSQP')
response

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: 6.494698443757973e-06
       x: [-1.398e+01  4.366e+01  2.770e+01]
     nit: 1
     jac: [ 5.138e-07 -4.033e-06  5.735e-06]
    nfev: 4
    njev: 1

# Find New Weights

In [149]:
next_w = np.zeros(weights.shape)
for i in range(len(weights)):
    next_w[i] = weights[i] + (response.x @ characteristics[-1,i,:]) / (np.count_nonzero(~np.isnan(characteristics[-1,:,:]))/3)

next_w

array([-0.15392515, -1.63226366, -2.58114127, -2.3781702 , -2.01743452,
       -1.73431925, -2.48397976, -2.06254471, -2.80009338, -1.48297405,
        1.91309059, -2.8094114 , -2.42926598, -0.22485848, -1.61746641,
       -1.46347063, -0.9922424 , -1.85080179,  0.1245534 , -1.54020126,
       -1.33436336, -1.30295921, -0.76151226, -0.1109189 , -0.20995246,
        1.19035235,  0.43982617, -0.34550187, -0.7077866 , -1.19528758,
       -0.97132281, -0.50712861, -1.87492782, -2.09387426, -2.02236567,
       -1.11023043, -1.26536404, -0.7541866 , -1.89966462, -1.4040221 ,
       -2.04357981, -2.08440924, -2.84235124, -2.03303118, -1.6115431 ,
       -2.03087226, -2.68606177, -4.46803079])

In [150]:
# apply equation 16
def long_only_constraint(weights):
    w_pos = np.clip(weights, 0, None) # set all negative to 0
    return w_pos / np.sum(w_pos)

l_w = long_only_constraint(next_w)
l_w

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.52158756, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.0339584 , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.32453925, 0.11991479, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        ])

# Out-of-Sample Testing

### 0. Load Out-of-Sample Data

In [151]:
mkt_cap_ = mkt_cap.loc['1963-12-01':'2024-01-01']
mkt_cap_norm = (mkt_cap_ - mkt_cap_.mean()) / mkt_cap_.std()
print(mkt_cap_norm.shape)

be_over_me_ = be_over_me.loc['1963-12-01':'2024-01-01']
be_over_me_norm = (be_over_me_ - be_over_me_.mean()) / be_over_me_.std()
print(be_over_me_norm.shape)

momentum_ = momentum.loc['1963-12-01':'2024-01-01']
momentum_norm = (momentum_ - momentum_.mean()) / momentum_.std()
print(momentum_norm.shape)

df_out = df.loc['1963-12-01':'2024-01-01']
print(df_out.shape)


(722, 48)
(722, 48)
(722, 48)
(722, 48)


In [152]:
x_hat = np.stack([mkt_cap_norm, be_over_me_norm, momentum_norm], axis= -1)
w_bar = mkt_cap.loc['1973-12-01']/ mkt_cap.loc['1973-12-01'].sum()
theta = response.x
theta

array([-13.98,  43.66,  27.7 ])

### Rolling Window

In [153]:
window_size = 120 # 10 years x 12 months = 120 timesteps
next_w = np.zeros(w_bar.shape[0])
monthly_rets = []

for t in tqdm(range(window_size, x_hat.shape[0])): 
    x_hat_subset = x_hat[t-window_size:t, :, :]
    df_out_subset = df_out.iloc[t-window_size:t+1]
    w_bar = mkt_cap_.iloc[t]/ mkt_cap_.iloc[t].sum()

    
    # 1. Estimate Theta
    res = minimize(objective, x0= theta, args= (x_hat_subset, df_out_subset, w_bar), method= 'SLSQP')
    theta = res.x
    # 2. Estimate Weights
    for i in range(len(weights)):
        if np.isnan(x_hat_subset[-1,i,:].any()):
            next_w[i] = 0
            continue
        next_w[i] = weights[i] + (theta @ x_hat_subset[-1,i,:]) / (np.count_nonzero(~np.isnan(x_hat_subset[-1,:,:]))/3)

    long_weights = long_only_constraint(next_w)
    # 3. Estimate Next Month Returns
    rets_clean = np.nan_to_num(df_out.iloc[t+1], nan=0)
    # 4. Record Return
    monthly_rets.append(long_weights @ rets_clean)
    print(f'Date: {df_out.index[t]}, Monthly Return: {long_weights @ rets_clean}')


  0%|          | 1/602 [00:02<22:45,  2.27s/it]

Date: 1973-12-01 00:00:00, Monthly Return: 0.03606207912968548


  0%|          | 2/602 [00:04<22:26,  2.24s/it]

Date: 1974-01-01 00:00:00, Monthly Return: 0.02703442197569069


  0%|          | 3/602 [00:06<22:40,  2.27s/it]

Date: 1974-02-01 00:00:00, Monthly Return: -0.026967588902543332


  1%|          | 4/602 [00:08<22:11,  2.23s/it]

Date: 1974-03-01 00:00:00, Monthly Return: -0.05127246342686814


  1%|          | 5/602 [00:11<21:55,  2.20s/it]

Date: 1974-04-01 00:00:00, Monthly Return: -0.06583462342433374


  1%|          | 6/602 [00:13<22:03,  2.22s/it]

Date: 1974-05-01 00:00:00, Monthly Return: -0.02861744040778523


  1%|          | 7/602 [00:15<21:56,  2.21s/it]

Date: 1974-06-01 00:00:00, Monthly Return: -0.04064070114176098


  1%|▏         | 8/602 [00:17<22:23,  2.26s/it]

Date: 1974-07-01 00:00:00, Monthly Return: -0.07593464832837406


  1%|▏         | 9/602 [00:20<22:29,  2.28s/it]

Date: 1974-08-01 00:00:00, Monthly Return: -0.09392547407819936


  2%|▏         | 10/602 [00:22<22:17,  2.26s/it]

Date: 1974-09-01 00:00:00, Monthly Return: 0.1199118546795156


  2%|▏         | 11/602 [00:24<22:16,  2.26s/it]

Date: 1974-10-01 00:00:00, Monthly Return: -0.036578015942278955


  2%|▏         | 12/602 [00:26<21:59,  2.24s/it]

Date: 1974-11-01 00:00:00, Monthly Return: -0.04348742799443352


  2%|▏         | 13/602 [00:29<22:06,  2.25s/it]

Date: 1974-12-01 00:00:00, Monthly Return: 0.21794703031848206


  2%|▏         | 14/602 [01:42<3:53:18, 23.81s/it]

Date: 1975-01-01 00:00:00, Monthly Return: 0.07144760565473998


  2%|▏         | 15/602 [02:28<4:58:36, 30.52s/it]

Date: 1975-02-01 00:00:00, Monthly Return: 0.07111265589442697


  3%|▎         | 16/602 [03:43<7:07:28, 43.77s/it]

Date: 1975-03-01 00:00:00, Monthly Return: 0.0512758107555711


  3%|▎         | 17/602 [04:07<6:09:49, 37.93s/it]

Date: 1975-04-01 00:00:00, Monthly Return: 0.06141028546768074


  3%|▎         | 18/602 [04:37<5:44:06, 35.35s/it]

Date: 1975-05-01 00:00:00, Monthly Return: 0.06262793384765562


  3%|▎         | 19/602 [05:44<7:16:47, 44.95s/it]

Date: 1975-06-01 00:00:00, Monthly Return: -0.057285890165148914


  3%|▎         | 20/602 [05:46<5:11:53, 32.15s/it]

Date: 1975-07-01 00:00:00, Monthly Return: -0.03790955916785445


  3%|▎         | 21/602 [06:13<4:56:57, 30.67s/it]

Date: 1975-08-01 00:00:00, Monthly Return: -0.04537865636305784


  4%|▎         | 22/602 [06:16<3:34:17, 22.17s/it]

Date: 1975-09-01 00:00:00, Monthly Return: 0.048217183933838496


  4%|▍         | 23/602 [06:51<4:12:15, 26.14s/it]

Date: 1975-10-01 00:00:00, Monthly Return: 0.0365710104549154


  4%|▍         | 24/602 [06:54<3:03:19, 19.03s/it]

Date: 1975-11-01 00:00:00, Monthly Return: -0.009291683638376343


  4%|▍         | 25/602 [07:09<2:52:49, 17.97s/it]

Date: 1975-12-01 00:00:00, Monthly Return: 0.15417424481060435


  4%|▍         | 26/602 [07:38<3:25:04, 21.36s/it]

Date: 1976-01-01 00:00:00, Monthly Return: 0.04132164343807565


  4%|▍         | 27/602 [07:41<2:29:58, 15.65s/it]

Date: 1976-02-01 00:00:00, Monthly Return: 0.01333441298759471


  5%|▍         | 28/602 [07:43<1:51:10, 11.62s/it]

Date: 1976-03-01 00:00:00, Monthly Return: -0.012160370790200735


  5%|▍         | 29/602 [07:45<1:24:01,  8.80s/it]

Date: 1976-04-01 00:00:00, Monthly Return: -0.019478151717870387


  5%|▍         | 30/602 [08:48<3:59:23, 25.11s/it]

Date: 1976-05-01 00:00:00, Monthly Return: 0.0479829161542938


  5%|▌         | 31/602 [08:51<2:53:38, 18.25s/it]

Date: 1976-06-01 00:00:00, Monthly Return: -0.0057406574557064514


  5%|▌         | 32/602 [09:56<5:07:57, 32.42s/it]

Date: 1976-07-01 00:00:00, Monthly Return: -0.011659770866180912


  5%|▌         | 33/602 [10:39<5:36:59, 35.54s/it]

Date: 1976-08-01 00:00:00, Monthly Return: 0.024555511645722217


  6%|▌         | 34/602 [11:43<6:56:46, 44.03s/it]

Date: 1976-09-01 00:00:00, Monthly Return: -0.022383951469705322


  6%|▌         | 35/602 [11:45<4:57:22, 31.47s/it]

Date: 1976-10-01 00:00:00, Monthly Return: 0.016158785980682162


  6%|▌         | 36/602 [11:47<3:34:13, 22.71s/it]

Date: 1976-11-01 00:00:00, Monthly Return: 0.07129956592238931


  6%|▌         | 37/602 [11:49<2:36:00, 16.57s/it]

Date: 1976-12-01 00:00:00, Monthly Return: -0.03175402048357731


  6%|▋         | 38/602 [12:25<3:29:30, 22.29s/it]

Date: 1977-01-01 00:00:00, Monthly Return: -0.016045968213097887


  6%|▋         | 39/602 [13:01<4:06:21, 26.25s/it]

Date: 1977-02-01 00:00:00, Monthly Return: -0.005339276428498579


  7%|▋         | 40/602 [13:03<2:58:25, 19.05s/it]

Date: 1977-03-01 00:00:00, Monthly Return: 0.011628461569829766


  7%|▋         | 41/602 [13:05<2:10:54, 14.00s/it]

Date: 1977-04-01 00:00:00, Monthly Return: -0.008514860104914375


  7%|▋         | 42/602 [13:07<1:37:50, 10.48s/it]

Date: 1977-05-01 00:00:00, Monthly Return: 0.05806657833835874


  7%|▋         | 43/602 [14:13<4:11:41, 27.01s/it]

Date: 1977-06-01 00:00:00, Monthly Return: -0.009789119140085354


  7%|▋         | 44/602 [15:39<6:56:43, 44.81s/it]

Date: 1977-07-01 00:00:00, Monthly Return: -0.0056918097792153805


  7%|▋         | 45/602 [15:49<5:19:30, 34.42s/it]

Date: 1977-08-01 00:00:00, Monthly Return: 0.018768884716791154


  7%|▋         | 45/602 [16:18<3:21:53, 21.75s/it]


KeyboardInterrupt: 