In [7]:
import pandas as pd
import math
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [8]:
reg0 = pd.read_csv('data/clean_milk0.csv')

#variables names
lmilk = ['LSCORE']
auct_key = ['YEAR','MONTH','DAY','SYSTEM','FMOZONE']
lcts = ['LFMO','LGAS','LPOPUL','LQSCORE']#,'LMEALS']
dummies = ['COOLER','ESC', 'NUM']
fekeys = ['3','6','7','9']


bid_key = auct_key + ['VENDOR'] + ['COUNTY']
covariates = lcts + dummies + fekeys
reg0['max*past'] = reg0['LSCORE_max1']*reg0['PAST_AUCT']
reg0['min*past'] = reg0['LSCORE_min1']*reg0['PAST_AUCT']

In [9]:
maxlag = 1
limitedlag = [l+str(i) for l in ['LSCORE_min','LSCORE_max'] for i in range(1,1+maxlag)]

maxlagy = 1
limitedlagy = [l+str(i) for l in ['LSCORE_miny','LSCORE_maxy'] for i in range(1,1+maxlagy)]


hist1 = ['INC','PAST_AUCT','min*past','max*past'] + limitedlag + limitedlagy

reg1 = reg0[['LSCORE']+covariates + hist1]
reg1 = reg1.dropna()

#previous prices 1 day
model1 = sm.OLS(reg1['LSCORE'], sm.add_constant(reg1[covariates + hist1]) )
fit1 = model1.fit()
print(fit1.summary())

                            OLS Regression Results                            
Dep. Variable:                 LSCORE   R-squared:                       0.421
Model:                            OLS   Adj. R-squared:                  0.418
Method:                 Least Squares   F-statistic:                     137.5
Date:                Tue, 23 Jul 2019   Prob (F-statistic):               0.00
Time:                        17:18:58   Log-Likelihood:                 4311.9
No. Observations:                3613   AIC:                            -8584.
Df Residuals:                    3593   BIC:                            -8460.
Df Model:                          19                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -1.3911      0.083    -16.755   

In [10]:
maxlag = 5
limitedlag = [l+str(i) for l in ['LSCORE_min','LSCORE_max'] for i in range(1,1+maxlag)]

maxlagy = 2
limitedlagy = [l+str(i) for l in ['LSCORE_miny','LSCORE_maxy'] for i in range(1,1+maxlagy)]

hist2 = ['INC','PAST_AUCT','min*past','max*past'] + limitedlag + limitedlagy


reg2 = reg0.copy()[['LSCORE'] + covariates + hist2]
reg2 = reg2.dropna()

#previous prices 1 day
model2 = sm.OLS(reg2['LSCORE'], sm.add_constant(reg2[covariates + hist2]) )
fit2 = model2.fit()
print(fit2.summary())

                            OLS Regression Results                            
Dep. Variable:                 LSCORE   R-squared:                       0.450
Model:                            OLS   Adj. R-squared:                  0.445
Method:                 Least Squares   F-statistic:                     88.21
Date:                Tue, 23 Jul 2019   Prob (F-statistic):               0.00
Time:                        17:18:58   Log-Likelihood:                 3841.1
No. Observations:                3153   AIC:                            -7622.
Df Residuals:                    3123   BIC:                            -7440.
Df Model:                          29                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const           -0.7853      0.109     -7.223   

In [16]:
df = pd.concat([fit1.params, fit1.HC0_se, fit2.params, fit2.HC0_se], axis=1)


df.loc['$R^2$'] =  [fit1.rsquared,np.NaN,fit2.rsquared,np.NaN]
df.loc['Obs.'] =  [int(reg1.shape[0]),np.NaN, int(reg2.shape[0]),np.NaN]


nice_cov = {'const':'(Intercept)', 
            'LFMO':'Raw milk',
            'LGAS':'Gas',
            'LPOPUL':'Population', 
            'LQSCORE':'Quantity',
            'COOLER':'Cooler',
            'ESC':'Escalated',
            'NUM':'No. Bidders', #+ fekeys
            '3':'Waco','6':'St. Angelo', '7':'Austin', '9':'San Antonio',
            'INC':'Incumbency',
            'PAST_AUCT':'Attendance',
            'min*past': 'Min * Attendance',
            'max*past':'Max * Attendance',
            'LSCORE_min1':'Min at auction t-1',
           'LSCORE_min2':'Min at auction t-2',
           'LSCORE_min3':'Min at auction t-3',
           'LSCORE_min4':'Min at auction t-4',
           'LSCORE_min5':'Min at auction t-5',
           'LSCORE_max1':'Max at auction t-1',
           'LSCORE_max2':'Max at auction t-2',
           'LSCORE_max3':'Max at auction t-3',
           'LSCORE_max4':'Max at auction t-4',
           'LSCORE_max5':'Max at auction t-5',
           'LSCORE_miny1':'Min in year y-1',
           'LSCORE_miny2':'Min in year y-1',
           'LSCORE_maxy1':'Max in year y-2',
           'LSCORE_maxy2':'Max in year y-2'}


#fix column names

df = df.reindex(index = ['Obs.','$R^2$', 'const'] + covariates + hist2)
df = df.rename(columns = {0:'fit1',1:'se1',2:'fit2',3:'se2'})
df = df.rename(index = nice_cov)
print(df)

df.to_csv('lag_res.csv',  float_format='%.4f', na_rep = '')

                           fit1       se1         fit2       se2
Obs.                3613.000000       NaN  3153.000000       NaN
$R^2$                  0.420985       NaN     0.450288       NaN
(Intercept)           -1.391150  0.082387    -0.785259  0.108855
Raw milk               0.332459  0.026341     0.303353  0.028301
Gas                   -0.011480  0.003892    -0.017008  0.004327
Population             0.005950  0.003011     0.007109  0.003442
Quantity              -0.010643  0.003162    -0.013009  0.003633
Cooler                 0.004329  0.002744    -0.000040  0.002881
Escalated             -0.020881  0.002531    -0.019985  0.002619
No. Bidders            0.004671  0.001280     0.003331  0.001322
Waco                  -0.031208  0.003895    -0.031380  0.004141
St. Angelo            -0.030572  0.010350    -0.030001  0.011396
Austin                -0.011543  0.021949    -0.012534  0.024008
San Antonio           -0.018338  0.004734    -0.012676  0.005016
Incumbency            -0.

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [12]:
#nice_lags = [l+str(i) for l in ['Min at auction t-', 'Max at auction t-'] for i in range(1,1+maxlag)]
#nice_lags = ['Incumbency','Past auction', 'Min * Past Auction'] + nice_lags
#nice_lagsy = [l+str(i) for l in ['Min in year t-', 'Max in year t-'] for i in range(1,1+maxlagy)]