# Cross-sectional regression

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy import stats

hml = pd.read_csv('../data/hml.csv', index_col=0, parse_dates=True).round(6).dropna()

# sample data
hml.iloc[:, :5]

Unnamed: 0,AAPL,alpha001,alpha002,alpha003,alpha005
2003-12-31,0.0042,-0.000781,-0.001524,-0.001524,-0.001961
2004-01-02,-0.0042,0.002618,-0.003609,-0.003609,0.001349
2004-01-05,0.0418,0.004879,-0.000859,-0.000859,-0.000268
2004-01-06,-0.0036,-0.002162,-0.004223,-0.004223,0.002374
2004-01-07,0.0226,-0.001999,-0.003419,-0.003419,-0.003572
...,...,...,...,...,...
2024-12-24,0.0115,0.000414,0.000276,0.001463,-0.001755
2024-12-26,0.0032,-0.000035,0.000425,-0.002217,-0.000648
2024-12-27,-0.0132,0.000362,0.000746,-0.000888,0.000948
2024-12-30,-0.0133,0.001971,0.000171,0.002560,0.001402


## alphas exploration

### T test for alphas

Check if the alphas are significantly different from zero using a t-test.

In [2]:
alphas_t_test_results = pd.DataFrame(index=hml.columns, columns=['t-stat', 'p-value', '<=0.05'])
for col in hml.columns:
    # skip AAPL
    if col == 'AAPL':
        continue
    if col == 'eqw':
        continue
    # t test
    t_stat, p_value = stats.ttest_1samp(hml[col], 0)
    alphas_t_test_results.loc[col, 't-stat'] = t_stat  # type: ignore
    alphas_t_test_results.loc[col, 'p-value'] = p_value  # type: ignore

# t-test results
alphas_t_test_results = alphas_t_test_results.astype(float)
alphas_t_test_results = alphas_t_test_results.round(3)
alphas_t_test_results['<=0.05'] = alphas_t_test_results['p-value'] <= 0.05

alphas_t_test_results[alphas_t_test_results['<=0.05']].sort_values('p-value', ascending=True)

Unnamed: 0,t-stat,p-value,<=0.05
alpha003,-3.989,0.0,True
alpha051,-9.596,0.0,True
alpha049,-10.508,0.0,True
alpha047,-3.564,0.0,True
alpha046,-8.055,0.0,True
alpha042,-5.334,0.0,True
alpha038,-10.072,0.0,True
alpha035,4.996,0.0,True
alpha034,-12.283,0.0,True
alpha033,-14.349,0.0,True


In [3]:
# selecting the significant alphas
significant_alphas = alphas_t_test_results[alphas_t_test_results['<=0.05']].index
significant_alphas = hml[significant_alphas]

### Correlation matrix

Check the correlation between alphas and other variables.

In [4]:
significant_alphas.corr().style.background_gradient(cmap='coolwarm', axis=None)

Unnamed: 0,alpha003,alpha005,alpha006,alpha008,alpha009,alpha010,alpha012,alpha013,alpha014,alpha016,alpha017,alpha018,alpha020,alpha025,alpha028,alpha033,alpha034,alpha035,alpha038,alpha040,alpha042,alpha045,alpha046,alpha047,alpha049,alpha051,alpha053,alpha060,alpha101
alpha003,1.0,-0.031244,0.007468,0.005741,0.026521,0.00923,-0.026717,0.023484,0.013844,0.033043,0.015798,0.022389,-0.013383,-0.019854,0.009538,-0.003244,0.002664,-0.075099,0.020651,0.01392,0.021086,0.013672,-0.078259,0.004291,0.013114,0.005941,0.013626,0.029481,0.014082
alpha005,-0.031244,1.0,0.047547,0.070904,0.073079,0.059928,-0.000619,0.005006,0.034946,0.026795,0.027283,0.044178,0.100208,0.063958,0.073792,0.021825,0.044736,0.034972,-0.013203,0.005022,-0.026379,-0.091961,0.01921,0.006711,0.07675,0.056208,-0.043234,-0.048337,-0.013804
alpha006,0.007468,0.047547,1.0,0.172446,0.188345,0.166684,0.003793,0.036216,0.25952,0.02771,0.016294,-0.001868,-0.125774,0.168088,-0.023111,0.02358,0.001529,0.04075,-0.16199,0.049895,0.006123,-0.021874,-0.009965,0.01897,0.01691,-0.001843,0.008775,-0.005086,0.009767
alpha008,0.005741,0.070904,0.172446,1.0,-0.177537,-0.1654,0.40617,0.018164,0.168429,-0.397908,-0.402292,-0.332381,-0.352549,0.627952,0.421974,0.484847,-0.018088,0.031352,0.273284,0.048251,-0.025627,-0.420262,0.045143,0.372946,0.079431,0.467459,-0.039555,0.37056,0.008271
alpha009,0.026521,0.073079,0.188345,-0.177537,1.0,0.709888,-0.415724,0.011417,0.164983,0.431915,0.485578,0.515128,0.359042,-0.194031,-0.355079,-0.486166,-0.008319,-0.00218,-0.584967,0.047048,0.002353,0.340361,-0.017926,-0.432731,0.118117,-0.359017,-0.006505,-0.440361,0.019681
alpha010,0.00923,0.059928,0.166684,-0.1654,0.709888,1.0,-0.347278,-0.008432,0.160751,0.429787,0.444765,0.48842,0.326798,-0.191354,-0.329518,-0.475051,-0.004418,0.010025,-0.570166,0.033494,0.000239,0.332483,-0.005245,-0.419497,0.103445,-0.340704,-0.009391,-0.434086,-0.012004
alpha012,-0.026717,-0.000619,0.003793,0.40617,-0.415724,-0.347278,1.0,0.015309,0.025471,-0.424393,-0.453996,-0.476303,-0.383466,0.464786,0.419324,0.48953,0.011211,0.004332,0.452077,0.017438,0.0114,-0.40564,0.002766,0.452003,0.042381,0.480397,-0.003976,0.449097,0.02767
alpha013,0.023484,0.005006,0.036216,0.018164,0.011417,-0.008432,0.015309,1.0,-0.00675,0.046303,0.028411,0.016438,0.028437,0.010593,-0.033618,0.004174,0.023612,0.008859,-0.003684,0.033219,0.041287,-0.006479,-0.026623,-0.022519,0.009718,4.7e-05,0.048437,-0.007323,0.008804
alpha014,0.013844,0.034946,0.25952,0.168429,0.164983,0.160751,0.025471,-0.00675,1.0,0.028718,0.040201,0.014609,-0.160942,0.182442,0.040128,-0.018356,0.016079,0.027012,-0.164899,0.012859,0.008431,0.019308,-0.033065,0.007613,0.006185,0.009511,0.050388,0.035094,-0.037724
alpha016,0.033043,0.026795,0.02771,-0.397908,0.431915,0.429787,-0.424393,0.046303,0.028718,1.0,0.469472,0.415429,0.3624,-0.45014,-0.420934,-0.509747,-0.021297,-0.024279,-0.446379,0.018201,0.005112,0.425316,-0.077722,-0.424208,-0.013898,-0.416214,0.038528,-0.429895,-0.047865


### Regression AAPL to significant alphas and ewp

A simple linear regression on the AAPL stock to see if the alphas and equally weighted portfolio (ewp) is significant.

In [5]:
aapl = hml['AAPL'].copy()
ewp = hml['eqw'].copy()


X = pd.concat([significant_alphas, ewp], axis=1, join='inner').sort_index()  # type: ignore
X = sm.add_constant(X)
model = sm.OLS(aapl, X).fit()
model.summary()


0,1,2,3
Dep. Variable:,AAPL,R-squared:,0.229
Model:,OLS,Adj. R-squared:,0.225
Method:,Least Squares,F-statistic:,52.04
Date:,"Fri, 18 Apr 2025",Prob (F-statistic):,1.6e-269
Time:,17:47:33,Log-Likelihood:,12339.0
No. Observations:,5284,AIC:,-24620.0
Df Residuals:,5253,BIC:,-24410.0
Df Model:,30,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.0015,0.000,4.066,0.000,0.001,0.002
alpha003,0.1467,0.111,1.325,0.185,-0.070,0.364
alpha005,-0.1856,0.109,-1.708,0.088,-0.399,0.027
alpha006,-0.3076,0.107,-2.875,0.004,-0.517,-0.098
alpha008,0.1739,0.107,1.632,0.103,-0.035,0.383
alpha009,0.4400,0.139,3.172,0.002,0.168,0.712
alpha010,-0.2009,0.129,-1.553,0.121,-0.455,0.053
alpha012,0.1247,0.120,1.036,0.300,-0.111,0.361
alpha013,0.3468,0.112,3.102,0.002,0.128,0.566

0,1,2,3
Omnibus:,9967.102,Durbin-Watson:,1.968
Prob(Omnibus):,0.0,Jarque-Bera (JB):,42028476.426
Skew:,-13.901,Prob(JB):,0.0
Kurtosis:,439.029,Cond. No.,511.0


## Rolling t-test for alphas

Each month end, we will check if the alphas are significantly different from zero using a t-test.

In [6]:
month_ends = pd.date_range(start=hml.index[0], end=hml.index[-1], freq='BME').to_list(  )
alphas_returns = hml.drop(columns=['eqw', 'AAPL']).copy()

rolling_result_p = pd.DataFrame(index=month_ends, columns=alphas_returns.columns)
rolling_result_bool = pd.DataFrame(index=month_ends, columns=alphas_returns.columns)
for i, month_end in enumerate(month_ends):
    if i == 0:
        continue

    previous_month_end = month_ends[i - 1]
    # get the data for the month
    month_data = alphas_returns.loc[previous_month_end:month_end]
    # t-test 
    for col in month_data.columns:
        # t test
        t_stat, p_value = stats.ttest_1samp(month_data[col], 0)
        p_value = float(round(p_value, 4))  # type: ignore
        rolling_result_p.loc[month_end, col] = p_value
        rolling_result_bool.loc[month_end, col] = p_value <= 0.05

In [11]:
rolling_result_p.to_csv('../data/rolling_result_p.csv')

In [10]:
rolling_result_bool.head(10)

Unnamed: 0,alpha001,alpha002,alpha003,alpha005,alpha006,alpha008,alpha009,alpha010,alpha011,alpha012,...,alpha051,alpha053,alpha054,alpha055,alpha060,alpha083,alpha084,alpha085,alpha086,alpha101
2003-12-31,,,,,,,,,,,...,,,,,,,,,,
2004-01-30,False,False,False,False,False,False,False,False,False,True,...,True,False,False,False,False,False,False,False,False,True
2004-02-27,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2004-03-31,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
2004-04-30,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,False
2004-05-31,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2004-06-30,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2004-07-30,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2004-08-31,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
2004-09-30,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
