In [None]:
import pickle
import pandas as pd
import matplotlib
import os
import re

import scipy

import collections
import datetime
import time

import geopandas as gpd

import numpy as np
 
from difflib import get_close_matches

from fuzzywuzzy import process
from fuzzywuzzy import fuzz
from sklearn import linear_model
import statsmodels.api as sm

from linearmodels import PanelOLS, FamaMacBeth
from scipy import stats

import itertools


# Direct Effects
## Grab Data

In [None]:
stockData = pd.read_csv("../../data/companyData/stocksWithControlsWeather.csv").\
    drop(columns = {'Unnamed: 0'})

stockData['indQtr']  = stockData.famafrench.astype('str') + stockData.qtr.astype('str')
stockData['yearQtr'] = stockData.year.astype('str')       + stockData.qtr.astype('str')

stockData.head()

In [None]:
stockData.columns[0:50]

# Direct Effects
Look at the effects on the suppliers when they're affected directly.

The below gives us the full, clustered standard errors.

Get the weather data to start.

In [None]:
'''stockData['heatwave_annual']          = (stockData.temp_annualLast5 == 5) * 1
stockData['extremePrecip_annual']     = (stockData.precip_annualLast5 > 0) * 1'''

stockData['heatwave_zipQuarter']      = (stockData.temp_zipQuarterLast5 == 5) * 1
stockData['extremePrecip_zipQuarter'] = (stockData.precip_zipQuarterLast5 > 0) * 1

industries = range(1,44)
indVars     = ['heatwave_zipQuarter', 'extremePrecip_zipQuarter'] 
outcomeVar = 'RET'

results = pd.DataFrame()

i = 0

#############################
for ind in industries:
    indData = stockData[stockData.famafrench == ind]
    indData = pd.get_dummies(indData, columns = ['year', 'yearQtr', 'indQtr', 'famafrench']) # , 'gvkey'
    
    firms = indData['gvkey']
    
    print('#################################################',ind,indData.shape)
    
    if indData.shape[0] > 0:

        for indVar in indVars:
            i = i + 1
            
            
            print(outcomeVar, "~", indVar)        

            col = indData.pop(indVar)
            indData.insert(0, col.name, col)

            # find: concurrent ; or lagged supplier data
            X = indData.loc[:,((indData.columns.str.contains(indVar)) | \
                                (indData.columns.str.contains('yearQtr_')))] # | 
                                # (stockData.columns.str.contains('famafrench_')))] # | 
                                # (stockData.columns.str.contains('famafrench_'))] #  | \

            print(X.columns)

            y = indData[outcomeVar]

            ######################################
            # fit the model on this subset
            '''reg = linear_model.LinearRegression()
            reg.fit(X,y)

            reg.coef_[-1]'''


            #######################################
            # try the more descriptive one
            start = time.time()

            model = sm.OLS(y, X).fit(cov_type='cluster',cov_kwds={'groups': firms},use_t=True)

            pvals = model.pvalues[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            coeff =  model.params[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            print(model.summary())

            results.loc[i,'ind']        = ind
            results.loc[i,'outcomeVar'] = outcomeVar
            results.loc[i,'weatherVar'] = indVar
            # results.loc[i,'cutoffVar']  = cutoffVar
            # results.loc[i,'firmVar']    = firmVar


            results.loc[i,'effect']     = coeff[0]

            results.loc[i,'pval']       = pvals[0]

            print(time.time() - start)
results.to_csv("../../data/stockResults_byInd.csv")

# Indirect Effects
This is almost exactly the same but with supplier weather in place of the direct company information.

In [None]:
stockData = pd.read_csv("../../data/companyData/supplierStocks.csv").\
    drop(columns = {'Unnamed: 0'})

stockData['indQtr']  = stockData.famafrench.astype('str') + stockData.qtr.astype('str')
stockData['yearQtr'] = stockData.year.astype('str')       + stockData.qtr.astype('str')

stockData.head()

In [None]:
stockData['supplier_heatwave_zipQuarter']      = (stockData.temp_zipQuarterLast5 == 5) * 1
stockData['supplier_extremePrecip_zipQuarter'] = (stockData.precip_zipQuarterLast5 > 0) * 1

industries = range(1,44)
indVars     = ['supplier_heatwave_zipQuarter', 'supplier_extremePrecip_zipQuarter'] 
outcomeVar = 'RET'

results = pd.DataFrame()

i = 0

#############################
for ind in industries:
    indData = stockData[stockData.supplier_famafrench == ind]
    indData = pd.get_dummies(indData, columns = ['year', 'yearQtr', 'indQtr', 'supplier_famafrench']) # , 'gvkey'
    
    firms = indData['gvkey']
    
    print('#################################################',ind,indData.shape)
    
    if (indData.shape[0] > 50) & (len(indData.gvkey.unique()) > 1):

        for indVar in indVars:
            i = i + 1
            print(outcomeVar, "~", indVar)        

            col = indData.pop(indVar)
            indData.insert(0, col.name, col)

            # find: concurrent ; or lagged supplier data
            X = indData.loc[:,((indData.columns.str.contains(indVar)) | \
                                (indData.columns.str.contains('yearQtr_')))] # | 
                                # (stockData.columns.str.contains('famafrench_')))] # | 
                                # (stockData.columns.str.contains('famafrench_'))] #  | \

            print(X.columns)

            y = indData[outcomeVar]

            ######################################
            # fit the model on this subset
            '''reg = linear_model.LinearRegression()
            reg.fit(X,y)

            reg.coef_[-1]'''


            #######################################
            # try the more descriptive one
            start = time.time()

            model = sm.OLS(y, X).fit(cov_type='cluster',cov_kwds={'groups': firms},use_t=True)

            pvals = model.pvalues[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            coeff =  model.params[0:len(stockData.columns[stockData.columns.str.contains(indVar)])]
            print(model.summary())

            results.loc[i,'ind']        = ind
            results.loc[i,'outcomeVar'] = outcomeVar
            results.loc[i,'weatherVar'] = indVar
            # results.loc[i,'cutoffVar']  = cutoffVar
            # results.loc[i,'firmVar']    = firmVar


            results.loc[i,'effect']     = coeff[0]

            results.loc[i,'pval']       = pvals[0]

            print(time.time() - start)
results.to_csv("../../data/stockResults_bySupplierInd.csv")

In [None]:
len(indData.gvkey.unique() > 1)