In [2]:
import pickle
import pandas as pd
import matplotlib
import os
import re

import scipy

import collections
import datetime
import time

import geopandas as gpd

import numpy as np
 
from difflib import get_close_matches

from fuzzywuzzy import process
from fuzzywuzzy import fuzz
from sklearn import linear_model
import statsmodels.api as sm

from linearmodels import PanelOLS, FamaMacBeth
from scipy import stats

import itertools


# Direct Effects
## Grab Data

In [7]:
stockData = pd.read_csv("../../data/companyData/stocksWithControlsWeather.csv").\
    drop(columns = {'Unnamed: 0'})

stockData.head()

Unnamed: 0,date,gvkey,RET,qtr,year,famafrench,ageTercile,sizeTercile,profitTercile,zipcode,temp_zipQuarterLast5,precip_zipQuarterLast5,temp_annualLast5,precip_annualLast5
0,2009-01-05,1690,0.042204,1,2009,36.0,0,2.0,2.0,95014,0.0,0.0,0.0,1.0
1,2009-01-05,15855,-0.033784,1,2009,42.0,0,2.0,1.0,95014,0.0,0.0,0.0,1.0
2,2009-01-05,140044,0.015152,1,2009,13.0,1,1.0,0.0,95014,0.0,0.0,0.0,1.0
3,2009-01-06,1690,-0.016494,1,2009,36.0,0,2.0,2.0,95014,0.0,0.0,0.0,1.0
4,2009-01-06,15855,0.025175,1,2009,42.0,0,2.0,1.0,95014,0.0,0.0,0.0,1.0


In [8]:
stockData['indQtr']  = stockData.famafrench.astype('str') + stockData.qtr.astype('str')
stockData['yearQtr'] = stockData.year.astype('str')       + stockData.qtr.astype('str')

In [11]:
sum(stockData.famafrench == 1)

4474

In [15]:
stockData.head()

Unnamed: 0,heatwave_zipQuarter,date,gvkey,RET,qtr,year,famafrench,ageTercile,sizeTercile,profitTercile,zipcode,temp_zipQuarterLast5,precip_zipQuarterLast5,temp_annualLast5,precip_annualLast5,indQtr,yearQtr,heatwave_annual,extremePrecip_annual,extremePrecip_zipQuarter
0,0,2009-01-05,1690,0.042204,1,2009,36.0,0,2.0,2.0,95014,0.0,0.0,0.0,1.0,36.01,20091,0,1,0
1,0,2009-01-05,15855,-0.033784,1,2009,42.0,0,2.0,1.0,95014,0.0,0.0,0.0,1.0,42.01,20091,0,1,0
2,0,2009-01-05,140044,0.015152,1,2009,13.0,1,1.0,0.0,95014,0.0,0.0,0.0,1.0,13.01,20091,0,1,0
3,0,2009-01-06,1690,-0.016494,1,2009,36.0,0,2.0,2.0,95014,0.0,0.0,0.0,1.0,36.01,20091,0,1,0
4,0,2009-01-06,15855,0.025175,1,2009,42.0,0,2.0,1.0,95014,0.0,0.0,0.0,1.0,42.01,20091,0,1,0


In [16]:
stockData.columns[0:50]

Index(['heatwave_zipQuarter', 'date', 'gvkey', 'RET', 'qtr', 'year',
       'famafrench', 'ageTercile', 'sizeTercile', 'profitTercile', 'zipcode',
       'temp_zipQuarterLast5', 'precip_zipQuarterLast5', 'temp_annualLast5',
       'precip_annualLast5', 'indQtr', 'yearQtr', 'heatwave_annual',
       'extremePrecip_annual', 'extremePrecip_zipQuarter'],
      dtype='object')

# Direct Effects
Look at the effects on the suppliers when they're affected directly.

The below gives us the full, clustered standard errors.

Get the weather data to start.

In [20]:
stockData['heatwave_annual']          = (stockData.temp_annualLast5 == 5) * 1
stockData['extremePrecip_annual']     = (stockData.precip_annualLast5 > 0) * 1

stockData['heatwave_zipQuarter']      = (stockData.temp_zipQuarterLast5 == 5) * 1
stockData['extremePrecip_zipQuarter'] = (stockData.precip_zipQuarterLast5 > 0) * 1

industries = range(1,44)
indVars     = ['heatwave_zipQuarter', 'extremePrecip_zipQuarter'] 
outcomeVar = 'RET'

results = pd.DataFrame()

i = 0

#############################
for ind in industries:
    indData = stockData[stockData.famafrench == ind]
    indData = pd.get_dummies(indData, columns = ['year', 'yearQtr', 'indQtr', 'famafrench']) # , 'gvkey'
    
    firms = indData['gvkey']
    
    print('#################################################',ind,indData.shape)
    
    if indData.shape[0] > 0:

        for indVar in indVars:
            print(outcomeVar, "~", indVar)        

            col = indData.pop(indVar)
            indData.insert(0, col.name, col)

            # find: concurrent ; or lagged supplier data
            X = indData.loc[:,((indData.columns.str.contains(indVar)) | \
                                (indData.columns.str.contains('yearQtr_')))] # | 
                                # (stockData.columns.str.contains('famafrench_')))] # | 
                                # (stockData.columns.str.contains('famafrench_'))] #  | \

            print(X.columns)

            y = indData[outcomeVar]

            ######################################
            # fit the model on this subset
            '''reg = linear_model.LinearRegression()
            reg.fit(X,y)

            reg.coef_[-1]'''


            #######################################
            # try the more descriptive one
            start = time.time()

            model = sm.OLS(y, X).fit(cov_type='cluster',cov_kwds={'groups': firms},use_t=True)

            pvals = model.pvalues[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            coeff =  model.params[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            print(model.summary())

            results.loc[i,'ind']        = ind
            results.loc[i,'outcomeVar'] = outcomeVar
            results.loc[i,'weatherVar'] = indVar
            # results.loc[i,'cutoffVar']  = cutoffVar
            # results.loc[i,'firmVar']    = firmVar


            results.loc[i,'effect']     = coeff[0]

            results.loc[i,'pval']       = pvals[0]

            print(time.time() - start)

################################################# 1 (4474, 64)
RET ~ heatwave_zipQuarter
Index(['heatwave_zipQuarter', 'yearQtr_20091', 'yearQtr_20092',
       'yearQtr_20093', 'yearQtr_20094', 'yearQtr_20101', 'yearQtr_20102',
       'yearQtr_20103', 'yearQtr_20104', 'yearQtr_20111', 'yearQtr_20112',
       'yearQtr_20113', 'yearQtr_20114', 'yearQtr_20121', 'yearQtr_20122',
       'yearQtr_20123', 'yearQtr_20124', 'yearQtr_20131', 'yearQtr_20132',
       'yearQtr_20133', 'yearQtr_20134', 'yearQtr_20141', 'yearQtr_20142',
       'yearQtr_20143', 'yearQtr_20144', 'yearQtr_20151', 'yearQtr_20152',
       'yearQtr_20153', 'yearQtr_20154', 'yearQtr_20161', 'yearQtr_20162',
       'yearQtr_20163', 'yearQtr_20164', 'yearQtr_20171', 'yearQtr_20172'],
      dtype='object')
                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:           

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:43   Log-Likelihood:             1.3959e+05
No. Observations:               59483   AIC:                        -2.791e+05
Df Residuals:                   59438   BIC:                        -2.787e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------
heatwave_zipQuarter     0.0001    

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                 -0.000
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:45   Log-Likelihood:                 16016.
No. Observations:                7915   AIC:                        -3.194e+04
Df Residuals:                    7870   BIC:                        -3.163e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:45   Log-Likelihood:                 30771.
No. Observations:               14308   AIC:                        -6.145e+04
Df Residuals:                   14263   BIC:                        -6.111e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:46   Log-Likelihood:                 14784.
No. Observations:                4875   AIC:                        -2.948e+04
Df Residuals:                    4830   BIC:                        -2.919e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:47   Log-Likelihood:                 52194.
No. Observations:               25892   AIC:                        -1.043e+05
Df Residuals:                   25847   BIC:                        -1.039e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:48   Log-Likelihood:                 86306.
No. Observations:               38870   AIC:                        -1.725e+05
Df Residuals:                   38825   BIC:                        -1.721e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:32:50   Log-Likelihood:                 83770.
No. Observations:               41454   AIC:                        -1.674e+05
Df Residuals:                   41409   BIC:                        -1.671e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:06   Log-Likelihood:             8.0261e+05
No. Observations:              508129   AIC:                        -1.605e+06
Df Residuals:                  508084   BIC:                        -1.605e+06
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:09   Log-Likelihood:             1.7326e+05
No. Observations:               86892   AIC:                        -3.464e+05
Df Residuals:                   86847   BIC:                        -3.460e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:09   Log-Likelihood:                 54535.
No. Observations:               22349   AIC:                        -1.090e+05
Df Residuals:                   22304   BIC:                        -1.086e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:10   Log-Likelihood:                 24327.
No. Observations:               10991   AIC:                        -4.856e+04
Df Residuals:                   10946   BIC:                        -4.823e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:12   Log-Likelihood:             1.6063e+05
No. Observations:               74084   AIC:                        -3.212e+05
Df Residuals:                   74039   BIC:                        -3.207e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:15   Log-Likelihood:             1.5068e+05
No. Observations:               69774   AIC:                        -3.013e+05
Df Residuals:                   69729   BIC:                        -3.009e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:16   Log-Likelihood:                 98766.
No. Observations:               48600   AIC:                        -1.974e+05
Df Residuals:                   48555   BIC:                        -1.970e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:16   Log-Likelihood:                 10348.
No. Observations:                5003   AIC:                        -2.061e+04
Df Residuals:                    4958   BIC:                        -2.031e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:21   Log-Likelihood:             2.9114e+05
No. Observations:              139240   AIC:                        -5.822e+05
Df Residuals:                  139195   BIC:                        -5.818e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                  0.002
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:29   Log-Likelihood:             5.0203e+05
No. Observations:              272084   AIC:                        -1.004e+06
Df Residuals:                  272039   BIC:                        -1.003e+06
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.007
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:31   Log-Likelihood:             1.3852e+05
No. Observations:               72320   AIC:                        -2.770e+05
Df Residuals:                   72275   BIC:                        -2.765e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.006
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:32   Log-Likelihood:                 70825.
No. Observations:               28803   AIC:                        -1.416e+05
Df Residuals:                   28758   BIC:                        -1.412e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.012
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:33   Log-Likelihood:                 12588.
No. Observations:                5978   AIC:                        -2.509e+04
Df Residuals:                    5933   BIC:                        -2.478e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:33   Log-Likelihood:                 26503.
No. Observations:               11339   AIC:                        -5.292e+04
Df Residuals:                   11294   BIC:                        -5.259e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.010
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:34   Log-Likelihood:                 16299.
No. Observations:                8245   AIC:                        -3.251e+04
Df Residuals:                    8200   BIC:                        -3.219e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.007
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:35   Log-Likelihood:                 47510.
No. Observations:               21655   AIC:                        -9.493e+04
Df Residuals:                   21610   BIC:                        -9.457e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.013
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:35   Log-Likelihood:                 16820.
No. Observations:               10213   AIC:                        -3.355e+04
Df Residuals:                   10168   BIC:                        -3.323e+04
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                  0.005
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:42   Log-Likelihood:             4.0332e+05
No. Observations:              224169   AIC:                        -8.066e+05
Df Residuals:                  224124   BIC:                        -8.061e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:48   Log-Likelihood:             4.7213e+05
No. Observations:              179737   AIC:                        -9.442e+05
Df Residuals:                  179692   BIC:                        -9.437e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

                            OLS Regression Results                            
Dep. Variable:                    RET   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                       nan
Date:                Thu, 21 Jul 2022   Prob (F-statistic):                nan
Time:                        13:33:51   Log-Likelihood:             2.1178e+05
No. Observations:              111953   AIC:                        -4.235e+05
Df Residuals:                  111908   BIC:                        -4.230e+05
Df Model:                          44                                         
Covariance Type:              cluster                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
extremePrecip_zipQuarter

ZeroDivisionError: float division by zero

### Faster and More Heuristic

The below gives us unclustered standard errors, output to a csv file.

In [None]:
def findSE(X,reg,y):
    N = len(X)
    p = len(X.columns) + 1  # plus one because LinearRegression adds an intercept term

    X_with_intercept = np.empty(shape=(N, p), dtype=np.float)
    X_with_intercept[:, 0] = 1
    X_with_intercept[:, 1:p] = X.values

    y_hat = reg.predict(X)
    residuals = y.values - y_hat
    residual_sum_of_squares = residuals.T @ residuals
    sigma_squared_hat = residual_sum_of_squares / (N - p)
    var_beta_hat = np.linalg.inv(X_with_intercept.T @ X_with_intercept) * sigma_squared_hat

    se0 = var_beta_hat[1, 1] ** 0.5
    se1 = var_beta_hat[2, 2] ** 0.5
    se2 = var_beta_hat[3, 3] ** 0.5
    se3 = var_beta_hat[4, 4] ** 0.5
    se4 = var_beta_hat[5, 5] ** 0.5
    se5 = var_beta_hat[6, 6] ** 0.5
    '''se6 = var_beta_hat[7, 7] ** 0.5
    se7 = var_beta_hat[8, 8] ** 0.5
    se8 = var_beta_hat[9, 9] ** 0.5'''
    return([abs(reg.coef_[0]/se0),abs(reg.coef_[1]/se1),abs(reg.coef_[2]/se2),
            abs(reg.coef_[3]/se3),abs(reg.coef_[4]/se4),abs(reg.coef_[5]/se5)]
          )


In [None]:
# try this by industry
weatherVars  = ['precip_', 'temp_'] 
statVarsYr   = ['zipQuarterLast5', 'annualLast5'] 
outcomeVars  = ['RET']


start = time.time()


results = pd.DataFrame()
i = 0
for outcomeVar in outcomeVars:
    for weatherVar in weatherVars:
        for statVar in statVarsYr:                     

                    i = i + 1
                    indVar = weatherVar + statVar

                    print(outcomeVar, "~", indVar)


                    # find: concurrent ; or lagged supplier data
                    X = stockData.loc[:,((stockData.columns.str.contains(indVar)) |
                                      (stockData.columns.str.contains('indQtr_')))] #  |


                    X = X[X.columns[(X.sum(axis = 0) >= 4)]]
                    

                    y = stockData[outcomeVar]


                    ######################################
                    # fit the model on this subset
                    reg = linear_model.LinearRegression()
                    reg.fit(X,y)
                    


                    results.loc[i,'outcomeVar'] = outcomeVar
                    results.loc[i,'weatherVar'] = weatherVar
                    results.loc[i,'statVar']    = statVar



                    results.loc[i,'lag0']       = reg.coef_[0]
                    results.loc[i,'lag1']       = reg.coef_[1]
                    results.loc[i,'lag2']       = reg.coef_[2]
                    results.loc[i,'lag3']       = reg.coef_[3]
                    results.loc[i,'lag4']       = reg.coef_[4]



                    '''seratios = findSE(X,reg,y)

                    results.loc[i,'ratio0']       = seratios[0]
                    results.loc[i,'ratio1']       = seratios[1]
                    results.loc[i,'ratio2']       = seratios[2]
                    results.loc[i,'ratio3']       = seratios[3]
                    results.loc[i,'ratio4']       = seratios[4]'''

                    # print(results)

                    print(time.time() - start)

                    print('*******************************************************************')

results.to_csv("../../data/stockResults_notNormd.csv")

In [None]:
results

# Indirect Effects
This is almost exactly the same but with supplier information in place of the direct company information.