# Import Dependencies

In [237]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

from matplotlib import pyplot as plt

from pathlib import Path
from tqdm import tqdm

import warnings

warnings.filterwarnings('ignore')

# Load Data

In [238]:
# load daily industry returns
# load 4 dataframes (returns, nb industries, avg firm size, Sum of BE / Sum of ME)
df = pd.read_csv(str(Path().absolute()) + "/data/48_Industry_Portfolios.CSV") 

# split these dataframes
df_list = np.split(df, df[df.isnull().all(1)].index, axis = 0) 

df_list

[          Date  Agric  Food   Soda   Beer   Smoke  Toys   Fun    Books  Hshld  \
 0     192607.0   2.37   0.12 -99.99  -5.19   1.29   8.65   2.50  50.21  -0.48   
 1     192608.0   2.23   2.68 -99.99  27.03   6.50  16.81  -0.76  42.98  -3.58   
 2     192609.0  -0.57   1.58 -99.99   4.02   1.26   8.33   6.42  -4.91   0.73   
 3     192610.0  -0.46  -3.68 -99.99  -3.31   1.06  -1.40  -5.09   5.37  -4.68   
 4     192611.0   6.75   6.26 -99.99   7.29   4.55   0.00   1.82  -6.40  -0.54   
 ...        ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
 1177  202408.0   2.32   5.09   5.38   0.88   7.96  -1.92   5.98   1.69   6.30   
 1178  202409.0   2.76   0.95   0.77   1.32  -1.31   3.37   4.29  -2.33   1.05   
 1179  202410.0   4.39  -4.28  -7.69  -5.15   8.46   0.29   4.02   0.61  -5.81   
 1180  202411.0   3.02   0.63   0.28  -0.40   2.15   4.04  14.14   5.40   7.01   
 1181  202412.0  -6.95  -5.70  -3.29  -6.04  -8.25  -6.95  -1.51  -6.17  -6.14   
 
       ...  Bo

In [239]:
# load 4 dataframes (returns, nb industries, avg firm size, Sum of BE / Sum of ME)
df = pd.read_csv(str(Path().absolute()) + "/data/48_Industry_Portfolios.CSV") 

# split these dataframes
df_list = np.split(df, df[df.isnull().all(1)].index, axis = 0) 

# clean data and convert date column to index
for i in range(len(df_list)):
    df_list[i] = pd.DataFrame(df_list[i])  
    df_list[i] = df_list[i].dropna()  
    df_list[i].loc[:, "Date"] = df_list[i].loc[:, "Date"].astype("int")  
    df_list[i] = df_list[i].set_index("Date")  
    
    # last data frame has yearly data
    if i == (len(df_list) - 1): 
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y")
        df_list[i].index = df_list[i].index + pd.DateOffset(months = 6)
    else:
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y%m")

# create a dataframe of excess returns, nb of industries and avg sizes
df = df_list[0] / 100
mask = (df <= -0.99)
df[mask] = 0

nb_industries = df_list[1]
nb_industries[mask] = 0

avg_size = df_list[2]
avg_size[mask] = 0

be_over_me = df_list[3]
be_over_me[mask] = 0

In [240]:
# market cap of each industry over time
mkt_cap = nb_industries * avg_size
print(mkt_cap.shape)

# book value to market value
be_over_me = be_over_me.resample("1MS").ffill()
print(be_over_me.shape)

# momentum with monthly data
momentum = df.rolling(12).mean()
print(momentum.shape)

(1182, 48)
(1177, 48)
(1182, 48)


# Normalize Data

In [241]:
mkt_cap_ = mkt_cap.loc['1927-06-01':'1973-12-01']
mkt_cap_norm = (mkt_cap_ - mkt_cap_.mean()) / mkt_cap_.std()
print(mkt_cap_norm.shape)

be_over_me_ = be_over_me.loc['1927-06-01':'1973-12-01']
be_over_me_norm = (be_over_me_ - be_over_me_.mean()) / be_over_me_.std()
print(be_over_me_norm.shape)

momentum_ = momentum.loc['1927-06-01':'1973-12-01']
momentum_norm = (momentum_ - momentum_.mean()) / momentum_.std()
print(momentum_norm.shape)

(559, 48)
(559, 48)
(559, 48)


In [242]:
def CRRA(wealth, gamma= 5):
    """"Constant Relative Risk Aversion Utility Function"""

    if gamma == 1:
        return np.log(wealth)
    else:
        return (wealth ** (1 - gamma)) / (1 - gamma)


In [249]:
characteristics = np.stack([mkt_cap_norm, be_over_me_norm, momentum_norm], axis= -1)
weights = np.ones(characteristics.shape[1]) / characteristics.shape[1]
theta = np.ones(shape=(3,1)) / 3

type(characteristics)

numpy.ndarray

In [244]:
def objective(theta:np.array, x:np.ndarray, rets:pd.DataFrame):
    accrued_wealth = 0
    wealth = 0
    weights = np.zeros(x.shape[1])
    for t in range(x.shape[0]):
        for i in range(x.shape[1]):
            wealth += (weights[i] + theta.T @ x[t,i,:]/ x.shape[1]) * rets.iloc[t+1,i]
        accrued_wealth += CRRA(wealth)
    
    return - accrued_wealth / characteristics.shape[0]

In [247]:
init = np.array([2.241e+02, 6.872e+02,-4.545e+02])
response = minimize(objective, x0= init, args= (characteristics, df), method= 'SLSQP')
response

 message: Optimization terminated successfully
 success: True
  status: 0
     fun: 1.273870560173743e-06
       x: [ 2.241e+02  6.872e+02 -4.545e+02]
     nit: 1
     jac: [-2.224e-08  4.481e-08  6.799e-08]
    nfev: 4
    njev: 1

In [259]:
for i in range(len(weights)):
    weights[i] = weights[i] + response.x @ characteristics[-1,i,:]

weights

array([ 472.86706507, 1168.8094588 , 2747.34995874,  380.34656057,
        609.28205171,  805.83703229, 1007.40356382,  866.06891231,
        495.11224484, 2514.04100461, 8449.89699891,  692.02203191,
        666.80534287,  926.74885336, 1142.82305468, 1171.32993905,
       1121.74071742,  971.78269503,  305.78919993, 1970.46692939,
        703.25960952,  641.68168671, 1420.76575284, 1590.20026871,
       -315.19373001, 3431.73974453,  -71.36750633,  475.16943647,
       -335.87147788,  118.78159501,  890.44324016,  977.9716028 ,
       1633.50897275,  813.66666386,  729.24139634,  921.09799838,
        835.65827133,  719.85699991,  406.82008499,  327.51066724,
        939.35552413, 1162.11996905,  872.71305959, 1261.90584703,
       1762.66181407, 1146.84520559, 1633.94983442, 2285.12752779])