# Import Dependencies

In [23]:
import pandas as pd
import numpy as np
from scipy.optimize import minimize

from matplotlib import pyplot as plt

from pathlib import Path
from tqdm import tqdm

import warnings

warnings.filterwarnings('ignore')

# Load Data

In [24]:
# load daily industry returns
# load 4 dataframes (returns, nb industries, avg firm size, Sum of BE / Sum of ME)
df = pd.read_csv(str(Path().absolute()) + "/data/48_Industry_Portfolios.CSV") 

# split these dataframes
df_list = np.split(df, df[df.isnull().all(1)].index, axis = 0) 

df_list

[          Date  Agric  Food   Soda   Beer   Smoke  Toys   Fun    Books  Hshld  \
 0     192607.0   2.37   0.12 -99.99  -5.19   1.29   8.65   2.50  50.21  -0.48   
 1     192608.0   2.23   2.68 -99.99  27.03   6.50  16.81  -0.76  42.98  -3.58   
 2     192609.0  -0.57   1.58 -99.99   4.02   1.26   8.33   6.42  -4.91   0.73   
 3     192610.0  -0.46  -3.68 -99.99  -3.31   1.06  -1.40  -5.09   5.37  -4.68   
 4     192611.0   6.75   6.26 -99.99   7.29   4.55   0.00   1.82  -6.40  -0.54   
 ...        ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
 1177  202408.0   2.32   5.09   5.38   0.88   7.96  -1.92   5.98   1.69   6.30   
 1178  202409.0   2.76   0.95   0.77   1.32  -1.31   3.37   4.29  -2.33   1.05   
 1179  202410.0   4.39  -4.28  -7.69  -5.15   8.46   0.29   4.02   0.61  -5.81   
 1180  202411.0   3.02   0.63   0.28  -0.40   2.15   4.04  14.14   5.40   7.01   
 1181  202412.0  -6.95  -5.70  -3.29  -6.04  -8.25  -6.95  -1.51  -6.17  -6.14   
 
       ...  Bo

In [None]:
# load 4 dataframes (returns, nb industries, avg firm size, Sum of BE / Sum of ME)
df = pd.read_csv(str(Path().absolute()) + "/data/48_Industry_Portfolios.CSV") 

# split these dataframes
df_list = np.split(df, df[df.isnull().all(1)].index, axis = 0) 

# clean data and convert date column to index
for i in range(len(df_list)):
    df_list[i] = pd.DataFrame(df_list[i])  
    df_list[i] = df_list[i].dropna()  
    df_list[i].loc[:, "Date"] = df_list[i].loc[:, "Date"].astype("int")  
    df_list[i] = df_list[i].set_index("Date")  
    
    # last data frame has yearly data
    if i == (len(df_list) - 1): 
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y")
        df_list[i].index = df_list[i].index + pd.DateOffset(months = 6)
    else:
        df_list[i].index = pd.to_datetime(df_list[i].index, format = "%Y%m")

# create a dataframe of excess returns, nb of industries and avg sizes
df = df_list[0] / 100
mask = (df <= -0.99)
df[mask] = np.nan

nb_industries = df_list[1]
nb_industries[mask] = np.nan

avg_size = df_list[2]
avg_size[mask] = np.nan

be_over_me = df_list[3]
be_over_me[mask] = np.nan

In [28]:
# market cap of each industry over time
mkt_cap = nb_industries * avg_size

# book value to market value
be_over_me = be_over_me.resample("1MS").ffill()

# momentum with monthly data
momentum = df.rolling(12).mean()

# Normalize Data

In [None]:
mkt_cap_norm = (mkt_cap - mkt_cap.mean()) / mkt_cap.std()
mkt_cap_norm.describe()

be_over_me_norm = (be_over_me - be_over_me.mean()) / be_over_me.std()
be_over_me_norm.head()

momentum_norm = (momentum - momentum.mean()) / momentum.std()
momentum_norm.head()



Unnamed: 0_level_0,Agric,Food,Soda,Beer,Smoke,Toys,Fun,Books,Hshld,Clths,...,Boxes,Trans,Whlsl,Rtail,Meals,Banks,Insur,RlEst,Fin,Other
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1926-07-01,-0.007748,0.36262,,3.056935,0.801121,0.868483,-0.059437,4.945518,-0.078766,0.655348,...,1.393274,-0.163369,1.466124,-0.248168,-0.152528,-0.258599,-0.686453,-0.20712,-0.018053,-0.199339
1926-08-01,-0.007748,0.36262,,3.056935,0.801121,0.868483,-0.059437,4.945518,-0.078766,0.655348,...,1.393274,-0.163369,1.466124,-0.248168,-0.152528,-0.258599,-0.686453,-0.20712,-0.018053,-0.199339
1926-09-01,-0.007748,0.36262,,3.056935,0.801121,0.868483,-0.059437,4.945518,-0.078766,0.655348,...,1.393274,-0.163369,1.466124,-0.248168,-0.152528,-0.258599,-0.686453,-0.20712,-0.018053,-0.199339
1926-10-01,-0.007748,0.36262,,3.056935,0.801121,0.868483,-0.059437,4.945518,-0.078766,0.655348,...,1.393274,-0.163369,1.466124,-0.248168,-0.152528,-0.258599,-0.686453,-0.20712,-0.018053,-0.199339
1926-11-01,-0.007748,0.36262,,3.056935,0.801121,0.868483,-0.059437,4.945518,-0.078766,0.655348,...,1.393274,-0.163369,1.466124,-0.248168,-0.152528,-0.258599,-0.686453,-0.20712,-0.018053,-0.199339


In [None]:
def CRRA(wealth, gamma= 5):
    """"Constant Relative Risk Aversion Utility Function"""

    if gamma == 1:
        return np.log(wealth)
    else:
        return (wealth ** (1 - gamma)) / (1 - gamma)


In [None]:
weights = np.ones(momentum_norm.shape[1])/momentum_norm.shape[1]
theta = np.ones(momentum_norm.shape[1])/momentum_norm.shape[1]

x_hat = momentum_norm.to_numpy()

for i in range(x_hat.shape[1]):
    weights + 

Agric     11
Food      11
Soda     455
Beer      11
Smoke     11
Toys      11
Fun       11
Books     11
Hshld     11
Clths     11
Hlth     527
MedEq     11
Drugs     11
Chems     11
Rubbr     82
Txtls     11
BldMt     11
Cnstr     11
Steel     11
FabPr    455
Mach      11
ElcEq     11
Autos     11
Aero      11
Ships     11
Guns     455
Gold     455
Mines     11
Coal      11
Oil       11
Util      11
Telcm     11
PerSv     23
BusSv     11
Comps     11
Chips     11
LabEq     11
Paper     47
Boxes     11
Trans     11
Whlsl     11
Rtail     11
Meals     11
Banks     11
Insur     11
RlEst     11
Fin       11
Other     11
dtype: int64