## Estimating Demand function

In [1]:
import pandas as pd
data = pd.read_csv('Data-GP1.csv')
data

Unnamed: 0,Mon,Tue,Wed,Thu,Date,Stormy,Mixed,p,q,Rainy,Cold,Wind
0,1,0,0,0,911202,1,0,-0.430783,8.994421,1,0,2.995732
1,0,1,0,0,911203,1,0,0.000000,7.707063,0,0,2.995732
2,0,0,1,0,911204,0,1,0.072321,8.350194,1,1,2.813411
3,0,0,0,1,911205,1,0,0.247139,8.656955,0,1,3.036554
4,0,0,0,0,911206,1,0,0.664327,7.844241,0,1,3.036554
...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0,0,0,920504,0,0,-0.798508,8.610683,0,0,2.862201
107,0,1,0,0,920505,0,1,-0.087011,7.162397,0,0,2.908721
108,0,0,1,0,920506,0,1,0.184922,7.362010,0,0,2.862201
109,0,0,0,1,920507,0,1,0.223143,8.764053,0,0,2.813411


In [2]:
#Defining variables
y = data[['q']] # Quantity estimate for demand function
y1 = data[['p']] # Price

### Equation that we are estimating is Qd = beta0 + beta1 x price + sumprod(Days x gamma) + u1

In [3]:
z1 = data[['Mon', 'Tue', 'Wed', 'Thu']] #Exogenous variables
print(z1)

     Mon  Tue  Wed  Thu
0      1    0    0    0
1      0    1    0    0
2      0    0    1    0
3      0    0    0    1
4      0    0    0    0
..   ...  ...  ...  ...
106    1    0    0    0
107    0    1    0    0
108    0    0    1    0
109    0    0    0    1
110    0    0    0    0

[111 rows x 4 columns]


In [27]:
zsm =  data[['Mon','Tue','Wed','Thu','Stormy', 'Mixed']]

In [16]:
# Assuming that price is the endogenous variable
# We can assume that price is related to certain weather conditions. Here considering - Stormy, Mixed, Rainy, Cold, Wind
# Hence we consider these as the instrument variables
listz = ['Mon','Tue','Wed','Thu','Stormy','Wind','Rainy','Mixed','Cold'] #added more variables 'mixed' and 'cold'

In [28]:
import statsmodels.api as sm
# z = data[listz]
zsm = sm.add_constant(zsm)
reduced_form = sm.OLS(y1, zsm).fit()
print(reduced_form.summary())

                            OLS Regression Results                            
Dep. Variable:                      p   R-squared:                       0.245
Model:                            OLS   Adj. R-squared:                  0.201
Method:                 Least Squares   F-statistic:                     5.624
Date:                Wed, 21 Sep 2022   Prob (F-statistic):           4.35e-05
Time:                        09:17:26   Log-Likelihood:                -34.566
No. Observations:                 111   AIC:                             83.13
Df Residuals:                     104   BIC:                             102.1
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.3596      0.079     -4.543      0.0

  x = pd.concat(x[::order], 1)


In [None]:
# Here, p values of Wind is less than 0.05. So price is heavily dependant on this instrument variable
# Their F- statistic is significant or they are highly correlated to the price.

In [29]:
hat_y1_arr = reduced_form.fittedvalues
hat_y1 = pd.DataFrame(hat_y1_arr, columns = ['hat_y1'])
hat_y1

Unnamed: 0,hat_y1
0,-0.021478
1,0.020678
2,-0.171967
3,0.126117
4,0.086770
...,...
106,-0.467817
107,-0.188785
108,-0.171967
109,-0.083347


In [30]:
#Updating structural model using 2nd stage linear regression

X = pd.concat([hat_y1, z1], axis=1)
X = sm.add_constant(X)
structuralModel = sm.OLS(y, X).fit()

print(structuralModel.summary())
print('-------------------------------')

                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.191
Model:                            OLS   Adj. R-squared:                  0.153
Method:                 Least Squares   F-statistic:                     4.965
Date:                Wed, 21 Sep 2022   Prob (F-statistic):           0.000403
Time:                        09:18:16   Log-Likelihood:                -112.05
No. Observations:                 111   AIC:                             236.1
Df Residuals:                     105   BIC:                             252.4
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.5402      0.156     54.741      0.0

  x = pd.concat(x[::order], 1)


In [31]:
# Hausman test
hat_v = reduced_form.resid
hat_v = pd.DataFrame(hat_v, columns = ['hat_v'])
print(hat_v)
struc_var = pd.concat([y1, z1, hat_v], axis=1)
struc_var = sm.add_constant(struc_var)
print(struc_var)

        hat_v
0   -0.409305
1   -0.020678
2    0.244287
3    0.121022
4    0.577557
..        ...
106 -0.330691
107  0.101774
108  0.356889
109  0.306490
110  0.683812

[111 rows x 1 columns]
     const         p  Mon  Tue  Wed  Thu     hat_v
0      1.0 -0.430783    1    0    0    0 -0.409305
1      1.0  0.000000    0    1    0    0 -0.020678
2      1.0  0.072321    0    0    1    0  0.244287
3      1.0  0.247139    0    0    0    1  0.121022
4      1.0  0.664327    0    0    0    0  0.577557
..     ...       ...  ...  ...  ...  ...       ...
106    1.0 -0.798508    1    0    0    0 -0.330691
107    1.0 -0.087011    0    1    0    0  0.101774
108    1.0  0.184922    0    0    1    0  0.356889
109    1.0  0.223143    0    0    0    1  0.306490
110    1.0  0.561118    0    0    0    0  0.683812

[111 rows x 7 columns]


  x = pd.concat(x[::order], 1)


In [32]:
# Hausman test
structural_equation = sm.OLS(y, struc_var).fit()
print(structural_equation.summary())

                            OLS Regression Results                            
Dep. Variable:                      q   R-squared:                       0.231
Model:                            OLS   Adj. R-squared:                  0.187
Method:                 Least Squares   F-statistic:                     5.221
Date:                Wed, 21 Sep 2022   Prob (F-statistic):           9.83e-05
Time:                        09:18:52   Log-Likelihood:                -109.21
No. Observations:                 111   AIC:                             232.4
Df Residuals:                     104   BIC:                             251.4
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          8.5402      0.153     55.889      0.0

In [None]:
# Here p value of the coef of residual terms is more than 0.05
# So we can not reject the null hypothesis that the price is indeed endogenous
#Here p-value is coming as >0.185 so y could be exogenous and we could estimate using OLS.

In [33]:
# Sargan Test
hat_u = structuralModel.resid
SarganModel = sm.OLS(hat_u, zsm).fit()
Rsquared = SarganModel.rsquared
print(Rsquared)

0.0070097144318779225


In [34]:
from scipy import stats
N = 111 # n is number of sample points
pval = 1 - stats.chi2.cdf(N*Rsquared,1)
print(pval)

0.3777295084257589


In [None]:
# p value is big, i.e. more than 0.05. So we can consider Stormy, Wind and Rainy as instrument variables

In [35]:
structuralModel.params

const     8.540234
hat_y1   -0.930141
Mon      -0.011902
Tue      -0.525832
Wed      -0.562620
Thu       0.099871
dtype: float64

### The Demand function can be considered as Qd = 8.5 - 1.12price - 0.02Mon - 0.53Tue - 0.57Wed + 0.11Thu