# Barra Factor Models: Momentum, Beta and Residual Volatility #

In [17]:
# Import Libraries

# Data Management
import pandas as pd
import numpy as np
import pickle

# Plots
import matplotlib.pyplot as plt

# Handle Files
import sys
import os

# Import Local Functions
sys.path.append(os.path.abspath("../source"))
from capm_toolkit import rolling_calc_rstr
from factors_toolkit import iterative_standardize_winsorize

In [18]:
# Let us call the data we are going to use

with open(r'..\additional_data\factors.pkl', 'rb') as f:
    factors_dict = pickle.load(f)

factors_dict

{'mask':               A  AAPL  ABBV  ABT  ADBE  ADI  ADM  ADP  ADSK  AEE  ...  WST  \
 Date                                                              ...        
 2015-01-02  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  0.0   
 2015-01-05  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  0.0   
 2015-01-06  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  0.0   
 2015-01-07  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  0.0   
 2015-01-08  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  0.0   
 ...         ...   ...   ...  ...   ...  ...  ...  ...   ...  ...  ...  ...   
 2024-10-22  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  1.0   
 2024-10-23  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  1.0   
 2024-10-24  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  1.0   
 2024-10-25  1.0   1.0   1.0  1.0   1.0  1.0  1.0  1.0   1.0  1.0  ...  1.0   
 2024-10-28  1.0   1.0   1.0  1.0   1.0  1.0

In [19]:
factors_dict.keys()

dict_keys(['mask', 'mktcap', 'size_factor', 'value_factor'])

In [20]:
# Let's define the mask and the market cap

mask = factors_dict['mask']
mktcap_df = factors_dict['mktcap']

In [21]:
print(mask.shape)
print(mktcap_df.shape)

(2472, 399)
(2472, 399)


In [22]:
# Define the Variables we are going to use:
variables = [
    'Open Price', 'High Price', 'Low Price', 
    'Close Price', 'Adjusted_close',
]

names = [
    'open', 'high', 'low', 
    'close', 'adj_close',
]

In [23]:
# Create the DataFrames inside a dictionary
universe = {}
drops = []

# Create the Loop
for ticker in mask.columns:
    try:
        # Create the DataFrame
        df = pd.read_csv(r'..\stocks\{}.csv'.format(ticker))
        df = df.set_index('Date')                               # Set Index
        df.index = pd.to_datetime(df.index)                     # Index to date time
        df = df[variables]                                      # Use the variables we need
        df.columns = names                                      # Rename columns
        
        # Store Dataframe
        universe[ticker] = df.loc['2015':]
        
    except:
        drops.append(ticker)
        print("No data for", ticker)
    
    print(f'{ticker} is ready.')

A is ready.
AAPL is ready.
ABBV is ready.
ABT is ready.
ADBE is ready.
ADI is ready.
ADM is ready.
ADP is ready.
ADSK is ready.
AEE is ready.
AEP is ready.
AES is ready.
AFL is ready.
AIG is ready.
AIZ is ready.
AJG is ready.
AKAM is ready.
ALB is ready.
ALGN is ready.
ALL is ready.
AMAT is ready.
AMD is ready.
AME is ready.
AMGN is ready.
AMP is ready.
AMT is ready.
AMZN is ready.
ANET is ready.
ANSS is ready.
AOS is ready.
APA is ready.
APD is ready.
APH is ready.
APO is ready.
ARE is ready.
AWK is ready.
BALL is ready.
BDX is ready.
BK is ready.
BKNG is ready.
BLDR is ready.
BMY is ready.
BR is ready.
BRO is ready.
BSX is ready.
BWA is ready.
BX is ready.
BXP is ready.
C is ready.
CAG is ready.
CAH is ready.
CBRE is ready.
CCI is ready.
CDNS is ready.
CDW is ready.
CE is ready.
CF is ready.
CFG is ready.
CHD is ready.
CHRW is ready.
CHTR is ready.
CI is ready.
CINF is ready.
CL is ready.
CLX is ready.
CMCSA is ready.
CME is ready.
CMG is ready.
CMI is ready.
CMS is ready.
CNC is rea

### Get Data ###

In [26]:
# Create the DataFrame for Adjusted Close Price
close_series = []

for stock, df in universe.items():
    series = df['adj_close'].rename(stock)  
    close_series.append(series)

# Concat
close_df = pd.concat(close_series, axis=1)
close_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-02,40.56,27.3325,65.89,44.90,72.340,55.540,52.02,83.290,59.530,45.99,...,52.16,33.327526,146.89,36.12,92.83,38.08,52.023557,109.226149,77.43,43.31
2015-01-05,39.80,26.5625,64.65,44.91,71.980,54.525,50.22,83.230,58.660,45.53,...,51.44,33.327526,144.39,35.71,90.29,35.71,50.966548,113.300665,76.34,43.05
2015-01-06,39.18,26.5650,64.33,44.40,70.530,53.250,49.23,82.535,57.500,45.00,...,50.56,33.290609,141.31,35.90,89.81,35.50,50.340971,112.340243,75.79,42.63
2015-01-07,39.70,26.9375,66.93,44.76,71.110,53.810,49.97,83.260,57.380,45.35,...,51.38,33.392133,145.46,36.21,90.72,35.78,52.009176,115.134198,77.72,43.51
2015-01-08,40.89,27.9725,67.63,45.68,72.915,54.760,49.24,85.170,58.800,45.51,...,52.39,33.752081,147.21,36.63,92.23,36.05,52.915184,116.356553,79.38,44.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,133.46,235.8600,188.76,116.12,493.110,224.620,55.69,291.340,288.480,87.76,...,286.40,31.650000,99.40,63.68,120.70,131.37,133.360000,104.260000,372.00,189.51
2024-10-23,133.03,230.7600,187.88,117.01,485.030,226.850,56.08,290.040,286.295,89.20,...,286.44,31.850000,97.68,64.32,120.27,131.72,134.020000,104.700000,368.09,188.99
2024-10-24,130.69,230.5700,189.65,116.54,482.870,229.680,56.50,289.430,285.770,89.01,...,330.65,32.130000,97.90,65.11,119.59,130.65,133.160000,104.000000,362.05,181.50
2024-10-25,130.19,231.4100,187.85,114.22,483.720,230.170,56.56,287.960,286.040,87.80,...,307.92,31.800000,97.99,64.46,119.49,130.42,133.040000,102.350000,360.09,180.01


In [27]:
print(f'Total number of missing values: {close_df.isna().sum().sum()}')
close_df.isna().sum().sort_values(ascending=False)

Total number of missing values: 2000


LW      463
DELL    423
FTV     366
MTCH    223
HPE     200
       ... 
FFIV      0
FE        0
FDX       0
FDS       0
ZTS       0
Length: 399, dtype: int64

### Call the Alpha, Beta and Sigma of each stock ###

In [28]:
tickers = close_df.columns.to_list()

In [31]:
# Call the Betas
betas_df = pd.read_csv(r'..\additional_data\capm_hbetas.csv')
betas_df.set_index('date', inplace = True)
betas_df.index = pd.to_datetime(betas_df.index)

betas_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-06,1.179868,1.135259,1.070516,1.161266,1.011452,1.296923,0.997473,1.029830,0.987923,0.619564,...,0.871581,0.861979,1.519218,0.580525,1.069838,1.017140,1.026261,0.830557,1.385564,0.988030
2016-01-07,1.195912,1.144425,1.049533,1.173685,1.018805,1.295928,0.985207,1.049064,1.025386,0.605797,...,0.874567,0.889280,1.582273,0.563144,1.054998,0.986555,1.034553,0.872434,1.405745,1.003773
2016-01-08,1.193099,1.140376,1.057993,1.176928,1.016292,1.289396,0.978202,1.048054,1.023915,0.604088,...,0.871438,0.891461,1.589723,0.568940,1.060223,0.987076,1.034519,0.869871,1.402137,1.005145
2016-01-11,1.192734,1.140599,1.043017,1.179582,1.019186,1.292529,0.975607,1.049846,1.030787,0.603718,...,0.868551,0.894156,1.582554,0.568351,1.060469,0.988450,1.024360,0.861886,1.397225,0.999894
2016-01-12,1.185442,1.129000,1.052119,1.182830,1.013343,1.296459,0.997551,1.048078,1.020875,0.606528,...,0.868596,0.888345,1.582640,0.564631,1.066002,0.995321,1.029970,0.873198,1.401682,1.002240
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-21,0.895617,1.028813,0.262124,0.216932,1.237327,1.684392,0.444662,0.424774,1.367280,0.160670,...,0.824613,0.745482,0.965940,0.116127,0.213860,1.007146,0.472845,0.577908,1.359374,0.780150
2024-10-22,0.908632,1.033298,0.245565,0.217830,1.243571,1.684884,0.442839,0.421141,1.364362,0.155055,...,0.820549,0.744016,0.981412,0.113129,0.214785,1.026426,0.473728,0.574032,1.356829,0.778839
2024-10-23,0.913300,1.040177,0.254397,0.226441,1.237341,1.674929,0.428225,0.409895,1.361680,0.128671,...,0.810975,0.741096,0.992221,0.091816,0.202523,1.026379,0.468113,0.572032,1.364582,0.779676
2024-10-24,0.912775,1.040517,0.252740,0.224066,1.237615,1.673004,0.427845,0.409367,1.359008,0.125171,...,0.824026,0.739076,0.997838,0.092252,0.199285,1.021399,0.465901,0.573090,1.364477,0.777017


In [32]:
# Call the Alphas
alpha_df = pd.read_csv(r'..\additional_data\capm_halpha.csv')
alpha_df.set_index('date', inplace = True)
alpha_df.index = pd.to_datetime(alpha_df.index)

alpha_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-06,0.000229,-0.000079,-0.000249,-0.000004,0.001109,0.000049,-0.001333,0.000067,0.000245,-0.000085,...,0.000534,-0.000687,-0.002378,0.000130,-0.000519,-0.000091,0.000260,-0.000156,-0.000342,0.000667
2016-01-07,0.000163,-0.000109,-0.000166,-0.000072,0.001068,0.000048,-0.001270,-0.000026,0.000093,-0.000032,...,0.000521,-0.000810,-0.002640,0.000200,-0.000454,0.000070,0.000229,-0.000371,-0.000433,0.000594
2016-01-08,0.000192,-0.000080,-0.000245,-0.000101,0.001099,0.000115,-0.001201,-0.000018,0.000114,-0.000012,...,0.000556,-0.000835,-0.002704,0.000141,-0.000504,0.000063,0.000232,-0.000348,-0.000405,0.000583
2016-01-11,0.000123,-0.000022,-0.000490,-0.000076,0.001180,0.000223,-0.001246,0.000012,0.000180,-0.000016,...,0.000511,-0.000850,-0.002819,0.000163,-0.000552,0.000041,0.000135,-0.000479,-0.000473,0.000430
2016-01-12,0.000076,-0.000075,-0.000417,-0.000039,0.001138,0.000243,-0.001130,0.000011,0.000088,-0.000018,...,0.000529,-0.000943,-0.002885,0.000123,-0.000493,0.000075,0.000199,-0.000362,-0.000434,0.000444
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-21,-0.000060,0.000004,0.000548,0.000506,-0.001727,-0.000724,-0.001495,0.000100,-0.000223,0.000245,...,-0.001853,-0.000502,-0.000527,0.000114,-0.000037,0.000355,-0.000192,-0.000718,0.000871,-0.000422
2024-10-22,-0.000232,-0.000036,0.000706,0.000474,-0.001795,-0.000738,-0.001502,0.000132,-0.000213,0.000268,...,-0.001826,-0.000553,-0.000603,0.000148,-0.000027,0.000187,-0.000213,-0.000715,0.000884,-0.000409
2024-10-23,-0.000253,-0.000085,0.000654,0.000435,-0.001764,-0.000658,-0.001410,0.000192,-0.000193,0.000429,...,-0.001764,-0.000524,-0.000669,0.000276,0.000039,0.000198,-0.000173,-0.000695,0.000842,-0.000409
2024-10-24,-0.000340,-0.000108,0.000731,0.000460,-0.001797,-0.000568,-0.001362,0.000187,-0.000160,0.000488,...,-0.001255,-0.000442,-0.000784,0.000326,0.000076,0.000250,-0.000164,-0.000754,0.000742,-0.000561


In [33]:
# Call the Sigmas
sigma_df = pd.read_csv(r'..\additional_data\capm_hsigma.csv')
sigma_df.set_index('date', inplace = True)
sigma_df.index = pd.to_datetime(sigma_df.index)

sigma_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-06,0.008325,0.012732,0.017248,0.008385,0.011005,0.015732,0.011382,0.006976,0.015244,0.010728,...,0.011069,0.009958,0.031827,0.010397,0.009815,0.008519,0.017766,0.010081,0.023385,0.015924
2016-01-07,0.008374,0.012759,0.017306,0.008276,0.010978,0.015732,0.011341,0.006890,0.015411,0.010771,...,0.011069,0.009950,0.032002,0.010453,0.009822,0.008056,0.017777,0.009509,0.023385,0.015915
2016-01-08,0.008369,0.012792,0.017332,0.008292,0.010954,0.015718,0.011336,0.006893,0.015400,0.010764,...,0.011052,0.009938,0.032028,0.010438,0.009832,0.008055,0.017776,0.009515,0.023397,0.015917
2016-01-11,0.008445,0.012828,0.017358,0.008285,0.010993,0.015779,0.011339,0.006894,0.015375,0.010764,...,0.011052,0.009951,0.032030,0.010448,0.009870,0.008073,0.017728,0.009531,0.023393,0.016018
2016-01-12,0.008430,0.012782,0.017363,0.008303,0.010989,0.015775,0.011170,0.006894,0.015403,0.010766,...,0.011062,0.010097,0.032075,0.010461,0.009902,0.008048,0.017752,0.009651,0.023396,0.016017
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-21,0.015822,0.011865,0.011919,0.011019,0.019373,0.014787,0.020806,0.010765,0.013478,0.012272,...,0.020997,0.013333,0.017280,0.013663,0.012240,0.010302,0.009338,0.012235,0.018513,0.015029
2024-10-22,0.015859,0.011858,0.011846,0.011031,0.019371,0.014788,0.020808,0.010760,0.013476,0.012262,...,0.020994,0.013382,0.017217,0.013662,0.012244,0.010158,0.009342,0.012237,0.018512,0.015029
2024-10-23,0.015852,0.011882,0.011839,0.010998,0.019363,0.014869,0.020819,0.010730,0.013480,0.012270,...,0.021001,0.013402,0.017219,0.013646,0.012209,0.010170,0.009361,0.012249,0.018502,0.015029
2024-10-24,0.015898,0.011882,0.011831,0.010983,0.019365,0.014861,0.020823,0.010731,0.013461,0.012226,...,0.023102,0.013389,0.017129,0.013665,0.012181,0.010097,0.009355,0.012248,0.018543,0.015248


### Beta Factor ###

In [37]:
smooth_beta = (2/3) * betas_df + (1/3)

smooth_beta

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-06,1.119912,1.090172,1.047011,1.107511,1.007635,1.197949,0.998315,1.019887,0.991949,0.746376,...,0.914388,0.907986,1.346145,0.720350,1.046559,1.011426,1.017507,0.887038,1.257043,0.992020
2016-01-07,1.130608,1.096283,1.033022,1.115790,1.012537,1.197286,0.990138,1.032710,1.016924,0.737198,...,0.916378,0.926186,1.388182,0.708762,1.036666,0.991036,1.023036,0.914956,1.270496,1.002515
2016-01-08,1.128733,1.093584,1.038662,1.117952,1.010861,1.192931,0.985468,1.032036,1.015943,0.736059,...,0.914292,0.927640,1.393149,0.712627,1.040149,0.991384,1.023013,0.913248,1.268092,1.003430
2016-01-11,1.128489,1.093733,1.028678,1.119721,1.012791,1.195019,0.983738,1.033230,1.020524,0.735812,...,0.912367,0.929437,1.388369,0.712234,1.040313,0.992300,1.016240,0.907924,1.264817,0.999929
2016-01-12,1.123628,1.086000,1.034746,1.121887,1.008896,1.197639,0.998367,1.032052,1.013917,0.737685,...,0.912398,0.925563,1.388427,0.709754,1.044001,0.996880,1.019980,0.915465,1.267788,1.001493
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-21,0.930411,1.019209,0.508083,0.477955,1.158218,1.456261,0.629775,0.616516,1.244854,0.440447,...,0.883075,0.830321,0.977293,0.410752,0.475907,1.004764,0.648563,0.718605,1.239583,0.853433
2024-10-22,0.939088,1.022199,0.497044,0.478553,1.162380,1.456589,0.628559,0.614094,1.242908,0.436704,...,0.880366,0.829344,0.987608,0.408753,0.476523,1.017618,0.649152,0.716021,1.237886,0.852559
2024-10-23,0.942200,1.026785,0.502931,0.484294,1.158227,1.449953,0.618816,0.606596,1.241120,0.419114,...,0.873983,0.827397,0.994814,0.394544,0.468349,1.017586,0.645409,0.714688,1.243055,0.853118
2024-10-24,0.941850,1.027012,0.501827,0.482711,1.158410,1.448669,0.618564,0.606245,1.239339,0.416781,...,0.882684,0.826051,0.998559,0.394835,0.466190,1.014266,0.643934,0.715393,1.242985,0.851345


In [None]:
# 3-Times Standardization
zscore_beta_factor = iterative_standardize_winsorize(
    smooth_beta,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_beta_factor.max().max())
print(zscore_beta_factor.min().min())

### Momentum Factor: Alpha ###

In [40]:
# Standardize Alpha
zscore_alpha = iterative_standardize_winsorize(
    alpha_df,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_alpha.max().max())
print(zscore_alpha.min().min())

5.015133837585544
-5.01463411539299


### Momentum Factor: Relative Strength ###

In [42]:
# Calculate Returns
returns_df = close_df.pct_change(fill_method=None)

returns_df.iloc[1:]

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015-01-05,-0.018738,-0.028172,-0.018819,0.000223,-0.004976,-0.018275,-0.034602,-0.000720,-0.014614,-0.010002,...,-0.013804,0.000000,-0.017020,-0.011351,-0.027362,-0.062237,-0.020318,0.037303,-0.014077,-0.006003
2015-01-06,-0.015578,0.000094,-0.004950,-0.011356,-0.020144,-0.023384,-0.019713,-0.008350,-0.019775,-0.011641,...,-0.017107,-0.001108,-0.021331,0.005321,-0.005316,-0.005881,-0.012274,-0.008477,-0.007205,-0.009756
2015-01-07,0.013272,0.014022,0.040417,0.008108,0.008223,0.010516,0.015031,0.008784,-0.002087,0.007778,...,0.016218,0.003050,0.029368,0.008635,0.010133,0.007887,0.033138,0.024870,0.025465,0.020643
2015-01-08,0.029975,0.038422,0.010459,0.020554,0.025383,0.017655,-0.014609,0.022940,0.024747,0.003528,...,0.019657,0.010779,0.012031,0.011599,0.016645,0.007546,0.017420,0.010617,0.021359,0.015399
2015-01-09,-0.007337,0.001072,-0.027355,-0.010508,-0.014743,-0.000730,-0.008530,-0.007162,-0.009014,-0.011206,...,-0.011453,-0.001367,0.012499,-0.012285,-0.001410,-0.018863,-0.001767,-0.014424,0.014613,0.001584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,-0.024130,-0.002622,0.011901,-0.007437,-0.009242,-0.004653,-0.007839,0.001478,-0.004486,-0.005327,...,-0.002959,-0.019820,0.010471,0.003941,0.005163,-0.008678,-0.004776,-0.008935,-0.001584,0.000317
2024-10-23,-0.003222,-0.021623,-0.004662,0.007664,-0.016386,0.009928,0.007003,-0.004462,-0.007574,0.016408,...,0.000140,0.006319,-0.017304,0.010050,-0.003563,0.002664,0.004949,0.004220,-0.010511,-0.002744
2024-10-24,-0.017590,-0.000823,0.009421,-0.004017,-0.004453,0.012475,0.007489,-0.002103,-0.001834,-0.002130,...,0.154343,0.008791,0.002252,0.012282,-0.005654,-0.008123,-0.006417,-0.006686,-0.016409,-0.039632
2024-10-25,-0.003826,0.003643,-0.009491,-0.019907,0.001760,0.002133,0.001062,-0.005079,0.000945,-0.013594,...,-0.068743,-0.010271,0.000919,-0.009983,-0.000836,-0.001760,-0.000901,-0.015865,-0.005414,-0.008209


In [43]:
# Create a whole new dataframe that contains all the stocks betas
relative_strenght_long = rolling_calc_rstr(
    returns_df,
    window_size=252,
    half_life=126
).T

relative_strenght_short = rolling_calc_rstr(
    returns_df,
    window_size=28,
    half_life=14,
    min_obs=13
).T

rs_df = (relative_strenght_long - relative_strenght_short).dropna()

rs_df

  return reduction(axis=axis, out=out, **passkwargs)
  return reduction(axis=axis, out=out, **passkwargs)


Unnamed: 0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
2015-12-31,0.045158,-0.043443,-0.121188,-0.015477,0.267018,-0.014089,-0.451867,0.041362,0.174479,0.021547,...,0.179492,-0.060680,-0.774719,0.029092,-0.115295,0.052651,-0.104994,-0.097829,-0.260988,0.069369
2016-01-04,0.055415,-0.047044,-0.116394,-0.015036,0.266064,-0.020984,-0.456358,0.043489,0.158018,0.025280,...,0.173835,-0.059800,-0.782277,0.030988,-0.118682,0.048675,-0.098073,-0.098589,-0.268700,0.073932
2016-01-05,0.072157,-0.046106,-0.108165,-0.024851,0.267210,-0.037625,-0.426217,0.040338,0.178058,0.021602,...,0.176123,-0.065922,-0.759351,0.032791,-0.103362,0.076675,-0.089730,-0.118096,-0.265295,0.073651
2016-01-06,0.085636,-0.049397,-0.106990,-0.023431,0.274461,-0.004502,-0.423820,0.036871,0.186147,0.026465,...,0.184055,-0.065821,-0.721535,0.033513,-0.092396,0.075742,-0.086998,-0.115793,-0.256437,0.078996
2016-01-07,0.084104,-0.068889,-0.132487,-0.032606,0.265818,-0.020059,-0.425242,0.025885,0.179157,0.023252,...,0.168170,-0.071372,-0.739224,0.028193,-0.102277,0.070090,-0.103271,-0.128120,-0.276148,0.069765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,0.154651,0.284189,0.264291,0.161394,-0.034556,0.235620,-0.157653,0.224268,0.292551,0.210827,...,-0.244816,0.095918,0.029950,0.168733,0.080603,0.226198,0.057637,-0.103290,0.483922,0.146960
2024-10-23,0.152176,0.283787,0.266072,0.164143,-0.079433,0.249270,-0.152084,0.224651,0.309516,0.222067,...,-0.249892,0.114498,0.037327,0.168162,0.088562,0.234048,0.062345,-0.101133,0.482271,0.147714
2024-10-24,0.165521,0.269418,0.261429,0.173367,-0.095849,0.240894,-0.135486,0.228312,0.305232,0.223420,...,-0.250281,0.111092,0.036799,0.165381,0.100314,0.239154,0.060161,-0.095068,0.487467,0.149903
2024-10-25,0.168915,0.276842,0.256255,0.179893,-0.088753,0.259329,-0.147662,0.274161,0.315802,0.218266,...,-0.229500,0.123891,0.059899,0.162017,0.105629,0.245763,0.049973,-0.096167,0.514202,0.157286


In [44]:
# Standardize RS
zscore_relative_strenght = iterative_standardize_winsorize(
    rs_df,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_relative_strenght.max().max())
print(zscore_relative_strenght.min().min())

5.002664211945189
-5.00164344987379


### Calculate Momentum Factor ###

In [46]:
# Build Momentum Factor
momentum_factor = pd.concat(
    [
        zscore_alpha, 
        zscore_relative_strenght
    ]
).groupby(level=0).mean().dropna()
        

momentum_factor

Unnamed: 0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
2015-12-31,0.115886,-0.251012,-0.572958,-0.135205,1.034616,-0.129458,-1.942310,0.100166,0.651412,0.018112,...,0.672168,-0.322392,-3.279250,0.049359,-0.548554,0.146916,-0.505897,-0.476228,-1.151873,0.216146
2016-01-04,0.153901,-0.268194,-0.553890,-0.136331,1.021696,-0.160834,-1.954415,0.104772,0.576589,0.029756,...,0.641748,-0.320740,-3.297079,0.053272,-0.563314,0.126136,-0.478415,-0.480537,-1.181332,0.230186
2016-01-05,0.201020,-0.291807,-0.550416,-0.203234,1.013847,-0.256462,-1.875804,0.068423,0.642331,-0.009655,...,0.634267,-0.374385,-3.264039,0.036975,-0.530405,0.219849,-0.473596,-0.591801,-1.205211,0.207247
2016-01-06,0.174423,-0.269485,-0.478760,-0.175740,1.027802,-0.108844,-1.707626,-0.012396,0.394048,-0.113195,...,0.539537,-0.619091,-2.875841,0.013346,-0.588367,-0.012503,-0.172736,-0.449318,-0.841770,0.387383
2016-01-07,0.157544,-0.302298,-0.465399,-0.207558,0.999226,-0.120187,-1.641771,-0.060958,0.322050,-0.069452,...,0.515890,-0.663407,-2.997633,0.058612,-0.547691,0.080895,-0.203326,-0.559925,-0.903131,0.345991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,-0.380116,-0.006739,0.318088,-0.016280,-1.557299,-0.457860,-1.674801,-0.051505,-0.076656,-0.012831,...,-2.021067,-0.664238,-0.829488,-0.161698,-0.436193,-0.020193,-0.577454,-1.169398,0.874450,-0.484026
2024-10-23,-0.413195,-0.049364,0.271998,-0.053287,-1.644856,-0.401618,-1.628885,-0.041336,-0.046589,0.068106,...,-2.011169,-0.625888,-0.862373,-0.122285,-0.408170,-0.018243,-0.567452,-1.171936,0.827576,-0.498723
2024-10-24,-0.431370,-0.093070,0.303512,-0.019827,-1.713659,-0.381977,-1.584306,-0.036099,-0.041817,0.101876,...,-1.778881,-0.599222,-0.927884,-0.103163,-0.366872,0.018259,-0.571407,-1.197170,0.796109,-0.574206
2024-10-25,-0.367275,-0.063392,0.263830,-0.023343,-1.705074,-0.336294,-1.555632,0.063245,-0.049658,0.037486,...,-1.886180,-0.598803,-0.908905,-0.160703,-0.340496,0.047627,-0.613180,-1.235792,0.866149,-0.582230


In [47]:
# Now let us check the minimums and maximums
print(momentum_factor.max().max())
print(momentum_factor.min().min())

5.008738942774109
-5.001535801031822


In [48]:
# Standardize Momentum
zscore_mometum_factor = iterative_standardize_winsorize(
    momentum_factor,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_mometum_factor.max().max())
print(zscore_mometum_factor.min().min())

5.000526523852518
-5.000114730840975


### Residual Volatility Factor: Sigma ###

In [49]:
# Standardize Sigma
zscore_sigma = iterative_standardize_winsorize(
    sigma_df,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_sigma.max().max())
print(zscore_sigma.min().min())

5.0119329936459955
-1.6689050068360654


### Residual Volatility Factor: Returns Volatility ###

In [50]:
# Get the 252-d std
ret_vol_df = returns_df.rolling(window=252).std()
ret_vol_df = ret_vol_df.loc['2016':]

ret_vol_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,0.014271,0.016830,0.020189,0.014067,0.014830,0.020082,0.014915,0.012312,0.018042,0.012371,...,0.013988,0.013109,0.034898,0.011915,0.014323,0.013125,0.020441,0.012883,0.026830,0.018594
2016-01-05,0.014222,0.016810,0.020157,0.014067,0.014826,0.020054,0.014786,0.012312,0.018018,0.012379,...,0.013983,0.013110,0.034911,0.011910,0.014234,0.012523,0.020401,0.012733,0.026862,0.018614
2016-01-06,0.014190,0.016856,0.020155,0.014059,0.014797,0.020180,0.014812,0.012326,0.017996,0.012364,...,0.013944,0.013180,0.035030,0.011924,0.014239,0.012543,0.020391,0.012726,0.026974,0.018603
2016-01-07,0.014419,0.017039,0.019991,0.014129,0.014856,0.020233,0.014844,0.012462,0.018389,0.012354,...,0.013982,0.013373,0.035445,0.011914,0.014257,0.012639,0.020399,0.012705,0.027072,0.018662
2016-01-08,0.014309,0.016866,0.020050,0.014128,0.014806,0.020209,0.014821,0.012387,0.018352,0.012361,...,0.013943,0.013363,0.035512,0.011912,0.014268,0.012644,0.020388,0.012688,0.027037,0.018662
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,0.017355,0.014232,0.012044,0.011164,0.021599,0.019861,0.021075,0.011195,0.017094,0.012247,...,0.021924,0.014626,0.018752,0.013629,0.012343,0.012913,0.010054,0.013031,0.021302,0.016201
2024-10-23,0.017357,0.014305,0.012028,0.011150,0.021623,0.019838,0.021078,0.011198,0.017073,0.012239,...,0.021888,0.014602,0.018720,0.013642,0.012308,0.012816,0.010037,0.013029,0.021318,0.016200
2024-10-24,0.017258,0.014306,0.012023,0.011132,0.021624,0.019847,0.020935,0.011197,0.017000,0.012197,...,0.023970,0.014610,0.018689,0.013619,0.012298,0.012828,0.010029,0.013035,0.021351,0.016396
2024-10-25,0.017203,0.014276,0.012030,0.011175,0.021518,0.019739,0.020883,0.009544,0.016927,0.012225,...,0.024276,0.014607,0.018607,0.013629,0.012298,0.012765,0.010028,0.013053,0.021232,0.016351


In [51]:
# Standardize Standard Deviation
zscore_ret_vol = iterative_standardize_winsorize(
    ret_vol_df,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_ret_vol.max().max())
print(zscore_ret_vol.min().min())

5.00952562897227
-1.7945264293033893


### Residual Volatility Factor: Average True Range ###

In [53]:
# Create a whole new dataframe that contains all the stocks betas
atr_series = []

for stock, df in universe.items():
    
    # Prices
    high = df['high']
    low = df['low']
    close = df['close']
    
    # Calculate ATR
    prev_close = close.shift(1)
    tr1 = high - low
    tr2 = (high - prev_close).abs()
    tr3 = (low - prev_close).abs()
    tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
    
    # Average True Range (ATR)
    atr = tr.rolling(window=14).mean()
    atr.name = stock
    
    atr_series.append(atr)

# Concat
atr_df = pd.concat(atr_series, axis=1)
atr_df = atr_df.loc['2016':]

atr_df

Unnamed: 0_level_0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-01-04,0.741429,0.596964,1.311429,0.904286,1.934286,1.163214,0.819286,1.482857,1.210357,0.712857,...,1.241429,0.533923,3.027857,0.590714,1.603929,0.585714,0.942731,1.502305,2.439286,0.897786
2016-01-05,0.739286,0.597857,1.255357,0.883571,1.872857,1.170357,0.832857,1.473571,1.168214,0.766429,...,1.253571,0.510190,3.177143,0.621429,1.538929,0.598571,0.977657,1.577143,2.166429,0.879929
2016-01-06,0.740000,0.604643,1.241786,0.858929,1.842143,1.205357,0.837857,1.462143,1.161786,0.787857,...,1.255000,0.535242,3.194286,0.647143,1.393214,0.667143,0.974062,1.642279,2.277857,0.881357
2016-01-07,0.815000,0.623929,1.244643,0.878214,1.890714,1.226071,0.846429,1.545000,1.339643,0.751429,...,1.285000,0.554360,3.427857,0.623571,1.426071,0.680714,1.035181,1.713653,2.446429,0.907071
2016-01-08,0.841786,0.614643,1.298929,0.906071,1.898571,1.212500,0.817500,1.505000,1.375357,0.719286,...,1.260000,0.550075,3.465000,0.632857,1.488214,0.652143,1.011812,1.712960,2.222857,0.967786
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,2.785714,3.870000,2.646429,1.887500,11.178571,5.168929,1.132143,3.090000,4.339643,1.071071,...,5.947857,0.581071,3.130000,0.981071,1.983929,1.968571,1.910000,1.772857,5.540357,3.101786
2024-10-23,2.806429,4.200714,2.607857,1.841786,11.122143,5.268929,1.124286,3.035714,4.441786,1.108929,...,5.802857,0.548214,2.965000,0.991071,1.919643,1.924286,1.910714,1.712857,5.485357,3.084643
2024-10-24,2.835714,4.097500,2.611429,1.844643,11.369286,5.071071,1.112500,2.956429,4.292857,1.062500,...,10.144286,0.527500,2.878571,0.933214,1.816071,1.887857,1.837857,1.738571,5.687857,3.518929
2024-10-25,2.848571,3.966071,2.635000,1.955000,10.291429,5.166071,1.115357,2.906429,4.192143,1.129643,...,11.456429,0.556786,2.809286,0.958214,1.784643,1.896429,1.794286,1.680000,5.707857,3.437500


In [56]:
# Standardize ART
zscore_atr = iterative_standardize_winsorize(
    atr_df,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_atr.max().max())
print(zscore_atr.min().min())

5.387461439917129
-1.560144510175958


### Calculate Residual Volatility Factor ###

In [59]:
# Build the Residual Vol Factor

residual_vol_factor = pd.concat(
    [
        zscore_sigma, 
        zscore_ret_vol, 
        zscore_atr
    ]
).groupby(level=0).mean().loc['2016':]

residual_vol_factor

Unnamed: 0,A,AAPL,ABBV,ABT,ADBE,ADI,ADM,ADP,ADSK,AEE,...,WST,WY,WYNN,XEL,XOM,XYL,YUM,ZBH,ZBRA,ZTS
2016-01-04,-0.421107,-0.240124,0.513210,-0.349153,0.317023,0.417605,-0.310399,-0.199694,0.234563,-0.632876,...,-0.164935,-0.659022,3.006083,-0.749484,0.076323,-0.627848,0.328782,-0.129762,1.840050,0.113056
2016-01-05,-0.421409,-0.239658,0.499163,-0.350940,0.320325,0.437596,-0.307234,-0.177928,0.226607,-0.595032,...,-0.137725,-0.673282,3.170723,-0.730207,0.059113,-0.680764,0.357794,-0.072574,1.735826,0.115188
2016-01-06,-0.477983,-0.075381,0.684457,-0.435910,0.169080,0.576975,-0.205903,-0.401757,0.380638,-0.433039,...,-0.118485,-0.526169,3.382967,-0.539632,-0.121809,-0.606881,0.626326,-0.108977,1.943507,0.352841
2016-01-07,-0.440639,-0.064602,0.659035,-0.441727,0.168784,0.570140,-0.214842,-0.381775,0.469704,-0.455452,...,-0.119768,-0.514324,3.466209,-0.554629,-0.124087,-0.632807,0.634629,-0.136749,1.982750,0.351487
2016-01-08,-0.436317,-0.075226,0.683978,-0.428720,0.162228,0.561234,-0.226369,-0.404374,0.478322,-0.465659,...,-0.134146,-0.514128,3.471075,-0.549342,-0.100843,-0.640873,0.624643,-0.141340,1.886563,0.375505
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-10-22,0.011434,-0.356742,-0.581333,-0.745782,1.138322,0.271779,0.434481,-0.669912,-0.043244,-0.662139,...,0.865361,-0.483699,0.210877,-0.493923,-0.587812,-0.693248,-0.922005,-0.563416,0.632739,-0.088148
2024-10-23,0.010073,-0.328678,-0.588871,-0.755516,1.132436,0.280711,0.432834,-0.679217,-0.039346,-0.662165,...,0.850573,-0.489136,0.194067,-0.496355,-0.600145,-0.704885,-0.925206,-0.570406,0.626531,-0.092221
2024-10-24,-0.001296,-0.345908,-0.594824,-0.760440,1.120798,0.250001,0.411675,-0.690307,-0.068251,-0.673981,...,1.414058,-0.494826,0.167455,-0.504931,-0.614815,-0.715356,-0.933240,-0.572667,0.627157,-0.044004
2024-10-25,-0.017184,-0.357207,-0.592050,-0.746297,1.033295,0.250646,0.396437,-0.791498,-0.082743,-0.667742,...,1.553967,-0.492013,0.156097,-0.504220,-0.618413,-0.719200,-0.936606,-0.573866,0.620947,-0.052479


In [62]:
# Standardize Residual Vol
zscore_resid_vol_factor = iterative_standardize_winsorize(
    residual_vol_factor,
    mktcap_df,
    mask
)

# Now let us check the minimums and maximums
print(zscore_resid_vol_factor.max().max())
print(zscore_resid_vol_factor.min().min())

5.000582571189602
-1.8980721207974993


In [61]:
factors_dict.keys()

dict_keys(['mask', 'mktcap', 'size_factor', 'value_factor'])

In [63]:
# Store data
factors_dict['momentum_factor'] = zscore_mometum_factor
factors_dict['residual_vol_factor'] = zscore_resid_vol_factor
factors_dict['beta_factor'] = zscore_beta_factor

factors_dict.keys()

dict_keys(['mask', 'mktcap', 'size_factor', 'value_factor', 'momentum_factor', 'residual_vol_factor', 'beta_factor'])

In [64]:
# Store
with open(r'..\additional_data\factors.pkl', 'wb') as f:
    pickle.dump(factors_dict, f)