# Aggregation Regression Analysis

In [1]:
import numpy as np
import pandas as pd
import datetime
import time

import matplotlib
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.tsa as tsa
from statsmodels.regression.rolling import RollingOLS

# to access Macro data
def FRED(code):
    fred_url = 'https://fred.stlouisfed.org/graph/fredgraph.csv?id='
    df = pd.read_csv(fred_url + code)
    if 'DATE' in df.columns:
        df['DATE'] = df.apply(lambda row: datetime.date.fromisoformat(row.DATE), axis=1)
        df['DATE'] = pd.to_datetime(df['DATE'])
        df.set_index('DATE', inplace=True)
    time.sleep(2)  # So that I don't get blocked through excessive requests
    return df

# to calculate percent differences correctly
def pct_diff(series: pd.Series, periods: int = 1) -> pd.Series:
    """
    Calculate the percent difference of a pandas series.

    Parameters
    ----------
    series : pandas.Series
        Input series to calculate percent difference from.
    periods : int, default=1
        Number of periods to shift the series by.

    Returns
    -------
    pandas.Series
        Series with percent difference from the shifted series.

    """
    shifted_series = series.shift(periods)
    rs = series / shifted_series.where(shifted_series >= 0, np.nan) - 1
    return rs

# to quickly make a correlation matrix heatmap
def correlmap(data):
    data_correlation = data.corr()    
    sns.heatmap(data_correlation, vmin=-1, vmax=1, cmap='coolwarm', annot=True)
    plt.show()
    display(pd.DataFrame(data_correlation.sum(), columns=['Sum of Correlations']))


def cqtr_to_date(cqtr, end=False):
    # cqtr is string
    stryear = cqtr[:4]
    quarter = int(cqtr[-1:])
    if end:
        month = quarter * 3
        strmonth = str(month)
        days = '30' if quarter == 2 or quarter == 3 else 31
        strdays = str(days)
    else:
        month = quarter * 3 - 2
        strmonth = str(month)
        strdays = '01'
    if len(strmonth) == 1:
            strmonth = '0' + strmonth
    return np.datetime64(stryear + '-' + strmonth + '-' + strdays)



from scipy.stats import pearsonr

def calculate_pvalues(df):
    dfcols = pd.DataFrame(columns=df.columns)
    pvalues = dfcols.transpose().join(dfcols, how='outer')
    for r in df.columns:
        for c in df.columns:
            tmp = df[df[r].notnull() & df[c].notnull()]
            pvalues[r][c] = round(pearsonr(tmp[r], tmp[c])[1], 4)
    return pvalues


In [2]:
matplotlib.rcParams['axes.spines.top'] = False
matplotlib.rcParams['axes.spines.right'] = False
matplotlib.rcParams['font.family'] = 'NewComputerModern10Book'
matplotlib.rcParams['font.size'] = 16
matplotlib.rcParams['xtick.bottom'] = False
matplotlib.rcParams['ytick.left'] = False

def comma(x, pos):
    return f'{x:,.0f}'

#plt.tick_params(top=False, bottom=False, left=False, right=False, labelleft=False, labelbottom=False)

In [3]:
qmdf = pd.read_csv('refined_inandout_fred_merged.csv', index_col=0)
qmdf.index = pd.to_datetime(qmdf.index)
display(qmdf)
display(qmdf.columns)

Unnamed: 0_level_0,Revenue_WRDS,EBITDA_WRDS,EBIT_WRDS,Pretax_Income_WRDS,NI_WRDS,Excess_Profit_WRDS,Assets_WRDS,Debt_WRDS,Enterprise_Value_WRDS,NGDP_MACRO,...,CapEx_MACRO,GVA_MACRO,NVA_MACRO,Revenue_MACRO,EBIT_MACRO,Retained_Earnings_MACRO,Income_Taxes_MACRO,Enterprise_Value_MACRO,Profits_aftertax_NOIVACC_MACRO,Profits_aftertax_IVACC_MACRO
datacqtr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-01-01,,,,,,,,,,,...,,,,,,,,,,
2012-04-01,0.004714,-0.010353,-0.02167,-0.15234,-0.14708,-0.082283,0.014197,0.016682,-0.024114,0.008607,...,-0.093439,0.012064,0.011964,0.012064,-0.003315,-0.022747,0.044679,-0.027651,-0.011403,0.018753
2012-07-01,0.01887,0.026226,0.012283,0.03883,-0.03185,-0.015312,0.015729,0.005799,0.037279,0.006937,...,0.121484,-0.003768,-0.006467,-0.003768,-0.023878,-0.094382,0.022272,0.069403,-0.003024,-0.011707
2012-10-01,0.024391,-0.104017,-0.163886,-0.325176,-0.109911,-0.445733,0.004658,-0.004444,-0.000736,0.006181,...,-0.15592,0.020629,0.023022,0.020629,0.034179,-0.168422,0.00298,-0.010575,0.01145,0.017016
2013-01-01,-0.004097,0.142478,0.232287,0.754513,0.785777,0.619979,0.015789,0.011908,0.06882,0.013871,...,0.166362,0.012313,0.013323,0.012313,0.030156,0.376508,0.050945,0.122142,0.041609,0.027162
2013-04-01,0.023031,-0.010278,-0.02268,-0.059297,-0.262372,-0.08541,-0.005783,-0.001934,0.005713,0.004835,...,-0.101932,0.006015,0.005128,0.006014,-0.015847,-0.017959,-0.007847,0.019896,-0.015161,0.000319
2013-07-01,0.0221,0.055682,0.064303,0.092747,0.250695,0.594963,0.014228,0.008828,0.031767,0.013459,...,0.175182,0.002281,0.000782,0.002281,0.036156,-0.053575,-0.002286,0.054578,0.002385,-0.002275
2013-10-01,0.015261,-0.025799,-0.046538,-0.140712,-0.203165,0.30265,0.002952,0.002702,0.047671,0.014049,...,-0.106329,0.011505,0.010962,0.011505,-0.022985,0.091104,0.019338,0.077376,0.012315,0.006376
2014-01-01,-0.013751,0.017792,0.029568,0.125905,0.063324,-0.382498,0.004983,0.005891,0.015733,0.000333,...,0.148486,0.00606,0.004407,0.00606,-0.028462,-0.250628,0.04957,0.027111,-0.023106,-0.015157
2014-04-01,0.021119,0.034852,0.040455,0.044235,0.089141,0.148624,0.009503,0.00658,0.033669,0.018652,...,-0.055906,0.023967,0.026049,0.023967,0.051936,0.149621,0.054696,0.04779,0.060978,0.052147


Index(['Revenue_WRDS', 'EBITDA_WRDS', 'EBIT_WRDS', 'Pretax_Income_WRDS',
       'NI_WRDS', 'Excess_Profit_WRDS', 'Assets_WRDS', 'Debt_WRDS',
       'Enterprise_Value_WRDS', 'NGDP_MACRO', 'Debt_MACRO',
       'Profitspretax_NOIVACC_MACRO', 'Profits_pretax_IVACC_MACRO',
       'Assets_MACRO', 'Liabilities_MACRO', 'Interest_Exp_MACRO',
       'CapEx_MACRO', 'GVA_MACRO', 'NVA_MACRO', 'Revenue_MACRO', 'EBIT_MACRO',
       'Retained_Earnings_MACRO', 'Income_Taxes_MACRO',
       'Enterprise_Value_MACRO', 'Profits_aftertax_NOIVACC_MACRO',
       'Profits_aftertax_IVACC_MACRO'],
      dtype='object')

In [5]:
def basicreg(endog, exog, df):
    mod = smf.ols(endog + ' ~ ' + exog, data=df)
    return mod.fit()


## Revenue

In [18]:
revenue_regressions = ['NGDP_MACRO', 'GVA_MACRO', 'Revenue_MACRO']
revenue_regressions = {l: basicreg('Revenue_WRDS', l, qmdf) for l in revenue_regressions}


print('REVENUE REGRESSIONS')
print(revenue_regressions.keys())


for l in revenue_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(revenue_regressions[l].params)
    print('Std. Errors')
    print(revenue_regressions[l].bse)
    print('Z-Scores')
    print(revenue_regressions[l].params / revenue_regressions[l].bse)
    print('R2:', revenue_regressions[l].rsquared)

REVENUE REGRESSIONS
dict_keys(['NGDP_MACRO', 'GVA_MACRO', 'Revenue_MACRO'])

 NGDP_MACRO
Parameters
Intercept    -0.008241
NGDP_MACRO    1.610543
dtype: float64
Std. Errors
Intercept     0.006550
NGDP_MACRO    0.282742
dtype: float64
Z-Scores
Intercept    -1.258290
NGDP_MACRO    5.696151
dtype: float64
R2: 0.44176773884003917

 GVA_MACRO
Parameters
Intercept   -0.003932
GVA_MACRO    1.164449
dtype: float64
Std. Errors
Intercept    0.006481
GVA_MACRO    0.222559
dtype: float64
Z-Scores
Intercept   -0.606726
GVA_MACRO    5.232078
dtype: float64
R2: 0.40036250358615055

 Revenue_MACRO
Parameters
Intercept       -0.003932
Revenue_MACRO    1.164450
dtype: float64
Std. Errors
Intercept        0.006481
Revenue_MACRO    0.222559
dtype: float64
Z-Scores
Intercept       -0.606729
Revenue_MACRO    5.232089
dtype: float64
R2: 0.40036350135817


## EBITDA

In [32]:
EBITDA_regressions = ['Profitspretax_NOIVACC_MACRO', 'NVA_MACRO', 'NGDP_MACRO']
EBITDA_regressions = {l: basicreg('EBITDA_WRDS', l, qmdf) for l in EBITDA_regressions}


print('EBITDA REGRESSIONS')
print(EBITDA_regressions.keys())


for l in EBITDA_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(EBITDA_regressions[l].params)
    print('Std. Errors')
    print(EBITDA_regressions[l].bse)
    print('Z-Scores')
    print(EBITDA_regressions[l].params / EBITDA_regressions[l].bse)
    print('R2:', EBITDA_regressions[l].rsquared)

EBITDA REGRESSIONS
dict_keys(['Profitspretax_NOIVACC_MACRO', 'NVA_MACRO', 'NGDP_MACRO'])

 Profitspretax_NOIVACC_MACRO
Parameters
Intercept                      0.007693
Profitspretax_NOIVACC_MACRO    0.379904
dtype: float64
Std. Errors
Intercept                      0.012724
Profitspretax_NOIVACC_MACRO    0.115880
dtype: float64
Z-Scores
Intercept                      0.604587
Profitspretax_NOIVACC_MACRO    3.278420
dtype: float64
R2: 0.20769946194125943

 NVA_MACRO
Parameters
Intercept    0.009578
NVA_MACRO    0.400647
dtype: float64
Std. Errors
Intercept    0.015075
NVA_MACRO    0.446473
dtype: float64
Z-Scores
Intercept    0.635347
NVA_MACRO    0.897360
dtype: float64
R2: 0.019262065637895454

 NGDP_MACRO
Parameters
Intercept     0.001958
NGDP_MACRO    1.077222
dtype: float64
Std. Errors
Intercept     0.015916
NGDP_MACRO    0.687099
dtype: float64
Z-Scores
Intercept     0.123029
NGDP_MACRO    1.567784
dtype: float64
R2: 0.05655917515275988


## EBIT

In [33]:
EBIT_regressions = ['Profits_pretax_IVACC_MACRO', 'EBIT_MACRO', 'NGDP_MACRO']
EBIT_regressions = {l: basicreg('EBIT_WRDS', l, qmdf) for l in EBIT_regressions}


print('EBIT REGRESSIONS')
print(EBIT_regressions.keys())


for l in EBIT_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(EBIT_regressions[l].params)
    print('Std. Errors')
    print(EBIT_regressions[l].bse)
    print('Z-Scores')
    print(EBIT_regressions[l].params / EBIT_regressions[l].bse)
    print('R2:', EBIT_regressions[l].rsquared)

EBIT REGRESSIONS
dict_keys(['Profits_pretax_IVACC_MACRO', 'EBIT_MACRO', 'NGDP_MACRO'])

 Profits_pretax_IVACC_MACRO
Parameters
Intercept                    -0.008366
Profits_pretax_IVACC_MACRO    2.293127
dtype: float64
Std. Errors
Intercept                     0.020267
Profits_pretax_IVACC_MACRO    0.508409
dtype: float64
Z-Scores
Intercept                    -0.412782
Profits_pretax_IVACC_MACRO    4.510398
dtype: float64
R2: 0.33163459468854584

 EBIT_MACRO
Parameters
Intercept     0.010411
EBIT_MACRO    0.677634
dtype: float64
Std. Errors
Intercept     0.020546
EBIT_MACRO    0.185067
dtype: float64
Z-Scores
Intercept     0.506717
EBIT_MACRO    3.661560
dtype: float64
R2: 0.24642083772587442

 NGDP_MACRO
Parameters
Intercept    -0.007057
NGDP_MACRO    2.451104
dtype: float64
Std. Errors
Intercept     0.025711
NGDP_MACRO    1.109927
dtype: float64
Z-Scores
Intercept    -0.274462
NGDP_MACRO    2.208346
dtype: float64
R2: 0.1063019363424591


## EBT

In [34]:
EBT_regressions = ['Profits_pretax_IVACC_MACRO', 'EBIT_MACRO', 'NGDP_MACRO']
EBT_regressions = {l: basicreg('Pretax_Income_WRDS', l, qmdf) for l in EBT_regressions}


print('EBT REGRESSIONS')
print(EBT_regressions.keys())


for l in EBT_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(EBT_regressions[l].params)
    print('Std. Errors')
    print(EBT_regressions[l].bse)
    print('Z-Scores')
    print(EBT_regressions[l].params / EBT_regressions[l].bse)
    print('R2:', EBT_regressions[l].rsquared)

EBT REGRESSIONS
dict_keys(['Profits_pretax_IVACC_MACRO', 'EBIT_MACRO', 'NGDP_MACRO'])

 Profits_pretax_IVACC_MACRO
Parameters
Intercept                      2.955080
Profits_pretax_IVACC_MACRO   -58.918296
dtype: float64
Std. Errors
Intercept                      2.247933
Profits_pretax_IVACC_MACRO    56.391923
dtype: float64
Z-Scores
Intercept                     1.314576
Profits_pretax_IVACC_MACRO   -1.044800
dtype: float64
R2: 0.025934091332134313

 EBIT_MACRO
Parameters
Intercept     2.222963
EBIT_MACRO   -2.705146
dtype: float64
Std. Errors
Intercept      2.174126
EBIT_MACRO    19.583030
dtype: float64
Z-Scores
Intercept     1.022463
EBIT_MACRO   -0.138137
dtype: float64
R2: 0.0004651957107533189

 NGDP_MACRO
Parameters
Intercept       8.042323
NGDP_MACRO   -496.201937
dtype: float64
Std. Errors
Intercept      1.738351
NGDP_MACRO    75.043070
dtype: float64
Z-Scores
Intercept     4.626408
NGDP_MACRO   -6.612229
dtype: float64
R2: 0.5160618347919588


## Net Income

In [35]:
NI_regressions = ['Profits_aftertax_IVACC_MACRO', 'Profits_aftertax_NOIVACC_MACRO', 'NGDP_MACRO']
NI_regressions = {l: basicreg('NI_WRDS', l, qmdf) for l in NI_regressions}


print('NI REGRESSIONS')
print(NI_regressions.keys())


for l in NI_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(NI_regressions[l].params)
    print('Std. Errors')
    print(NI_regressions[l].bse)
    print('Z-Scores')
    print(NI_regressions[l].params / NI_regressions[l].bse)
    print('R2:', NI_regressions[l].rsquared)

NI REGRESSIONS
dict_keys(['Profits_aftertax_IVACC_MACRO', 'Profits_aftertax_NOIVACC_MACRO', 'NGDP_MACRO'])

 Profits_aftertax_IVACC_MACRO
Parameters
Intercept                       -0.288932
Profits_aftertax_IVACC_MACRO    14.730791
dtype: float64
Std. Errors
Intercept                       0.184630
Profits_aftertax_IVACC_MACRO    4.631662
dtype: float64
Z-Scores
Intercept                      -1.564919
Profits_aftertax_IVACC_MACRO    3.180454
dtype: float64
R2: 0.19789167377011108

 Profits_aftertax_NOIVACC_MACRO
Parameters
Intercept                        -0.180283
Profits_aftertax_NOIVACC_MACRO    4.661908
dtype: float64
Std. Errors
Intercept                         0.180385
Profits_aftertax_NOIVACC_MACRO    1.642757
dtype: float64
Z-Scores
Intercept                        -0.999433
Profits_aftertax_NOIVACC_MACRO    2.837857
dtype: float64
R2: 0.1641767097678286

 NGDP_MACRO
Parameters
Intercept     -0.747929
NGDP_MACRO    55.288061
dtype: float64
Std. Errors
Intercept     0.105581


## Economic Profits

In [None]:
EP_regressions = ['Profits_aftertax_IVACC_MACRO', 'Profits_aftertax_NOIVACC_MACRO', 'NGDP_MACRO']
EP_regressions = {l: basicreg('Economic_Profits_WRDS', l, qmdf) for l in EP_regressions}


print('EP REGRESSIONS')
print(EP_regressions.keys())


for l in EP_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(EP_regressions[l].params)
    print('Std. Errors')
    print(EP_regressions[l].bse)
    print('Z-Scores')
    print(EP_regressions[l].params / EP_regressions[l].bse)
    print('R2:', EP_regressions[l].rsquared)

## Enterprise Value

In [39]:
EV_regressions = ['Enterprise_Value_MACRO', 'NGDP_MACRO']
EV_regressions = {l: basicreg('Enterprise_Value_WRDS', l, qmdf) for l in EV_regressions}


print('EV REGRESSIONS')
print(EV_regressions.keys())


for l in EV_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(EV_regressions[l].params)
    print('Std. Errors')
    print(EV_regressions[l].bse)
    print('Z-Scores')
    print(EV_regressions[l].params / EV_regressions[l].bse)
    print('R2:', EV_regressions[l].rsquared)

EV REGRESSIONS
dict_keys(['Enterprise_Value_MACRO', 'NGDP_MACRO'])

 Enterprise_Value_MACRO
Parameters
Intercept                -0.001927
Enterprise_Value_MACRO    0.669477
dtype: float64
Std. Errors
Intercept                 0.002161
Enterprise_Value_MACRO    0.026801
dtype: float64
Z-Scores
Intercept                 -0.891509
Enterprise_Value_MACRO    24.979098
dtype: float64
R2: 0.9383417252287944

 NGDP_MACRO
Parameters
Intercept     0.017129
NGDP_MACRO   -0.178201
dtype: float64
Std. Errors
Intercept     0.009588
NGDP_MACRO    0.413899
dtype: float64
Z-Scores
Intercept     1.786583
NGDP_MACRO   -0.430543
dtype: float64
R2: 0.004500804688216675


## Assets

In [40]:
assets_regressions = ['Assets_MACRO', 'NGDP_MACRO']
assets_regressions = {l: basicreg('Assets_WRDS', l, qmdf) for l in assets_regressions}


print('ASSETS REGRESSIONS')
print(assets_regressions.keys())


for l in assets_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(assets_regressions[l].params)
    print('Std. Errors')
    print(assets_regressions[l].bse)
    print('Z-Scores')
    print(assets_regressions[l].params / assets_regressions[l].bse)
    print('R2:', assets_regressions[l].rsquared)

ASSETS REGRESSIONS
dict_keys(['Assets_MACRO', 'NGDP_MACRO'])

 Assets_MACRO
Parameters
Intercept       0.003376
Assets_MACRO    0.230559
dtype: float64
Std. Errors
Intercept       0.002797
Assets_MACRO    0.125457
dtype: float64
Z-Scores
Intercept       1.207393
Assets_MACRO    1.837754
dtype: float64
R2: 0.07610504710351262

 NGDP_MACRO
Parameters
Intercept     0.007232
NGDP_MACRO   -0.019173
dtype: float64
Std. Errors
Intercept     0.002395
NGDP_MACRO    0.103377
dtype: float64
Z-Scores
Intercept     3.019865
NGDP_MACRO   -0.185463
dtype: float64
R2: 0.0008382380164695391


## Debt

In [41]:
debt_regressions = ['Debt_MACRO', 'NGDP_MACRO']
debt_regressions = {l: basicreg('Debt_WRDS', l, qmdf) for l in debt_regressions}


print('DEBT REGRESSIONS')
print(debt_regressions.keys())


for l in debt_regressions.keys():
    print('\n',l)
    print('Parameters')
    print(debt_regressions[l].params)
    print('Std. Errors')
    print(debt_regressions[l].bse)
    print('Z-Scores')
    print(debt_regressions[l].params / debt_regressions[l].bse)
    print('R2:', debt_regressions[l].rsquared)

DEBT REGRESSIONS
dict_keys(['Debt_MACRO', 'NGDP_MACRO'])

 Debt_MACRO
Parameters
Intercept    -0.003869
Debt_MACRO    0.622657
dtype: float64
Std. Errors
Intercept     0.005793
Debt_MACRO    0.277110
dtype: float64
Z-Scores
Intercept    -0.667803
Debt_MACRO    2.246970
dtype: float64
R2: 0.1096416590988949

 NGDP_MACRO
Parameters
Intercept     0.008823
NGDP_MACRO   -0.223376
dtype: float64
Std. Errors
Intercept     0.004464
NGDP_MACRO    0.192689
dtype: float64
Z-Scores
Intercept     1.976631
NGDP_MACRO   -1.159260
dtype: float64
R2: 0.031737362080429876


End of Notebook.