# Anton Melnychuk ECON 3385 - Problem Set 3

February 1st, 2026

In [60]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.api as sm

data = pd.read_csv('sim_market.csv')
data.head()

Unnamed: 0,m,z_a,z_b,eps_d_1,eps_d_2,eps_s_1,eps_s_2
0,1,0.764311,-0.187538,-0.379044,0.985495,1.197447,0.146624
1,2,-0.604995,-1.887242,-1.128497,1.84903,-0.148301,-1.050508
2,3,-1.034992,-1.250214,-1.106938,0.835775,-0.167924,-0.271543
3,4,0.20141,-0.495516,0.932348,-0.601762,1.435051,0.029398
4,5,0.667977,1.908742,1.38137,0.450992,-0.475587,-0.16326


### Question 1

![Solution for Question 1](image1.png)

$Z_m^A$ and $Z_m^B$ appear only in the supply equations ($Q_m^{S1}$ and $Q_m^{S2}$) with coefficients $\gamma_{12}$, $\gamma_{13}$, $\gamma_{22}$, and $\gamma_{23}$. They shift the supply curves but do not affect demand directly. Therefore, they serve as valid instruments.


### Question 2

Using 2SLS with $Z_m^A$ and $Z_m^B$ as instruments:

In [61]:
# Q^D1
beta_10 = 10
beta_11 = -1
beta_12 = 0.5

# Q^D2
beta_20 = 15
beta_21 = -1
beta_22 = 0.5

# Q^S1
gamma_10 = 0
gamma_11 = 1.5
gamma_12 = -0.5
gamma_13 = -1

# Q^S2
gamma_20 = 0
gamma_21 = 1.5
gamma_22 = -1
gamma_23 = -0.5

In [62]:
# P^2 = A - (beta_22 * P^1) / (beta_21 - gamma_21), where A = (gamma_20 - beta_20) + (gamma_22*Z^A + gamma_23*Z^B) + (eps^S2 - eps^D2)
# P^1 = ((C - beta_12 * A) * (beta_21 - gamma_21)) / theta, where C = (gamma_10 - beta_10) + (gamma_12*Z^A + gamma_13*Z^B) + (eps^S1 - eps^D1)

# Denominator
denominator = (beta_11 - gamma_11) * (beta_21 - gamma_21) - beta_12 * beta_22

# Component A
A = ((gamma_20 - beta_20) + 
     gamma_22 * data['z_a'] + gamma_23 * data['z_b'] +
     data['eps_s_2'] - data['eps_d_2']) / (beta_21 - gamma_21)

# Component C
C = ((gamma_10 - beta_10) +
     (data['eps_s_1'] - data['eps_d_1']) +
     gamma_12 * data['z_a'] + gamma_13 * data['z_b'])

# Numerator
numerator = (C - beta_12 * A) * (beta_21 - gamma_21)

In [63]:
p_m_1 = numerator / denominator
p_m_2 = A - (beta_22 * p_m_1) / (beta_21 - gamma_21)

data['p_m_1'] = p_m_1
data['p_m_2'] = p_m_2

print("Equilibrium prices:")
print(f"Mean P^1: {p_m_1.mean():.4f}")
print(f"Mean P^2: {p_m_2.mean():.4f}")

Equilibrium prices:
Mean P^1: 5.3832
Mean P^2: 7.1208


In [73]:
q_m_1 = beta_10 + beta_11 * p_m_1 + beta_12 * p_m_2 + data['eps_d_1']
q_m_2 = beta_20 + beta_21 * p_m_2 + beta_22 * p_m_1 + data['eps_d_2']

data['q_m_1'] = q_m_1
data['q_m_2'] = q_m_2

print("Equilibrium quantities:")
print(f"Mean Q^1: {q_m_1.mean():.4f}")
print(f"Mean Q^2: {q_m_2.mean():.4f}")

data.head()

Equilibrium quantities:
Mean Q^1: 8.0666
Mean Q^2: 10.6078


Unnamed: 0,m,z_a,z_b,eps_d_1,eps_d_2,eps_s_1,eps_s_2,p_m_1,p_m_2,q_m_1,q_m_2,p_m_1_hat,p_m_2_hat
0,1,0.764311,-0.187538,-0.379044,0.985495,1.197447,0.146624,4.966671,7.597099,8.452835,10.871731,5.493797,7.374619
1,2,-0.604995,-1.887242,-1.128497,1.84903,-0.148301,-1.050508,4.208437,7.382056,8.354094,11.571192,4.32121,6.252955
2,3,-1.034992,-1.250214,-1.106938,0.835775,-0.167924,-0.271543,4.2428,6.627448,7.963986,11.329727,4.487597,6.265904
3,4,0.20141,-0.495516,0.932348,-0.601762,1.435051,0.029398,4.986244,6.726246,9.309227,10.165115,5.190189,7.035398
4,5,0.667977,1.908742,1.38137,0.450992,-0.475587,-0.16326,7.311253,8.356891,8.248562,10.749728,6.423122,7.986858


### Question 3

In [65]:
# 2SLS

# First stage regressions for p_m_1 and p_m_2
fs_p1 = smf.ols('p_m_1 ~ z_a + z_b', data=data).fit()
fs_p2 = smf.ols('p_m_2 ~ z_a + z_b', data=data).fit()

In [66]:
print("First stage regressions:")
print(fs_p1.summary())

First stage regressions:
                            OLS Regression Results                            
Dep. Variable:                  p_m_1   R-squared:                       0.459
Model:                            OLS   Adj. R-squared:                  0.456
Method:                 Least Squares   F-statistic:                     168.6
Date:                Sun, 01 Feb 2026   Prob (F-statistic):           1.01e-53
Time:                        17:20:29   Log-Likelihood:                -375.76
No. Observations:                 400   AIC:                             757.5
Df Residuals:                     397   BIC:                             769.5
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      5.3581      

In [67]:
print("First stage regression:")
print(fs_p2.summary())

First stage regression:
                            OLS Regression Results                            
Dep. Variable:                  p_m_2   R-squared:                       0.425
Model:                            OLS   Adj. R-squared:                  0.422
Method:                 Least Squares   F-statistic:                     146.9
Date:                Sun, 01 Feb 2026   Prob (F-statistic):           1.78e-48
Time:                        17:20:29   Log-Likelihood:                -392.92
No. Observations:                 400   AIC:                             791.8
Df Residuals:                     397   BIC:                             803.8
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.1030      0

| Dependent Variable | Intercept | z_a | z_b | F-statistic |
|-------------------|-----------|-----|-----|-------------|
| **P_m^1** | 5.3581 (0.031) | 0.2895 (0.033) | 0.4566 (0.031) | 168.6 |
| **P_m^2** | 7.1030 (0.032) | 0.4320 (0.034) | 0.3119 (0.033) | 146.9 |

*Note: Standard errors in parentheses.*

Yes, the estimates make sense:

All coefficients are statistically significant with very low standard errors relative to the magnitude of the coefficients. Both F-statistics are well above 10 (168.6 for P_m^1 and 146.9 for P_m^2). This confirm that z_a and z_b are relevant instruments that strongly predict the endogenous prices.

For P_m^1: Both z_a (0.2895) and z_b (0.4566) have positive coefficients, meaning increases in these supply shifters raise the equilibrium price of good 1.

Also note, for P_m^2: Both z_a (0.4320) and z_b (0.3119) also have positive coefficients, but with different magnitudes than for P_m^1. This indicates the instruments affect the two goods differently, which is expected since they represent supply shifters that may impact production costs differently across goods.

The intercepts represent the baseline price when both instruments are zero (z_a = z_b = 0). In our results, the intercept of P_m^1 is 5.3581 (positive) and that of P_m^2 is 7.1030 (positive). Positive price makes sense.

The standard errors are small relative to the coefficients, indicating precise estimates. The different magnitudes across goods (e.g., z_b has larger effect on P_m^1 than z_a, while z_a has larger effect on P_m^2) suggest the instruments have heterogeneous impacts, which is plausible.

In [68]:
p_m_1_hat = fs_p1.fittedvalues
p_m_2_hat = fs_p2.fittedvalues

data['p_m_1_hat'] = p_m_1_hat
data['p_m_2_hat'] = p_m_2_hat

In [69]:
# Regress q_m_1
model_q1 = smf.ols('q_m_1 ~ p_m_1_hat + p_m_2_hat', data=data).fit()

print("Demand for Good 1 (q_m_1) - 2SLS:")
print(model_q1.summary())

Demand for Good 1 (q_m_1) - 2SLS:
                            OLS Regression Results                            
Dep. Variable:                  q_m_1   R-squared:                       0.144
Model:                            OLS   Adj. R-squared:                  0.140
Method:                 Least Squares   F-statistic:                     33.37
Date:                Sun, 01 Feb 2026   Prob (F-statistic):           4.03e-14
Time:                        17:20:29   Log-Likelihood:                -479.23
No. Observations:                 400   AIC:                             964.5
Df Residuals:                     397   BIC:                             976.4
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.4

In [70]:
# Regress q_m_2
model_q2 = smf.ols('q_m_2 ~ p_m_1_hat + p_m_2_hat', data=data).fit()

print("Demand for Good 2 (q_m_2) - 2SLS:")
print(model_q2.summary())

Demand for Good 2 (q_m_2) - 2SLS:
                            OLS Regression Results                            
Dep. Variable:                  q_m_2   R-squared:                       0.181
Model:                            OLS   Adj. R-squared:                  0.177
Method:                 Least Squares   F-statistic:                     43.76
Date:                Sun, 01 Feb 2026   Prob (F-statistic):           6.69e-18
Time:                        17:20:29   Log-Likelihood:                -447.67
No. Observations:                 400   AIC:                             901.3
Df Residuals:                     397   BIC:                             913.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     15.8

| Demand Equation | Intercept | P_m^1 | P_m^2 |
|----------------|-----------|-------|-------|
| **Q_m^D1** | 9.4341 (0.620) | -1.1816 (0.222) | 0.7012 (0.228) |
| **Q_m^D2** | 15.8618 (0.573) | 0.7376 (0.205) | -1.2955 (0.210) |

**True Parameters:**
- Q_m^D1: β_{10} = 10, β_{11} = -1, β_{12} = 0.5
- Q_m^D2: β_{20} = 15, β_{21} = -1, β_{22} = 0.5

Yes, we do recover all true parameters!

In [87]:
ci1 = model_q1.conf_int()
ci2 = model_q2.conf_int()

# Check confidence interval coverage

beta_10_lower = ci1.loc['Intercept', 0]
beta_10_upper = ci1.loc['Intercept', 1]
beta_10_in_range = beta_10_lower <= beta_10 <= beta_10_upper
beta_11_lower = ci1.loc['p_m_1_hat', 0]
beta_11_upper = ci1.loc['p_m_1_hat', 1]
beta_11_in_range = beta_11_lower <= beta_11 <= beta_11_upper
beta_12_lower = ci1.loc['p_m_2_hat', 0]
beta_12_upper = ci1.loc['p_m_2_hat', 1]
beta_12_in_range = beta_12_lower <= beta_12 <= beta_12_upper

print(f"Q_m^D1: {beta_10_in_range and beta_11_in_range and beta_12_in_range}")

beta_20_lower = ci2.loc['Intercept', 0]
beta_20_upper = ci2.loc['Intercept', 1]
beta_20_in_range = beta_20_lower <= beta_20 <= beta_20_upper
beta_22_lower = ci2.loc['p_m_1_hat', 0]
beta_22_upper = ci2.loc['p_m_1_hat', 1]
beta_22_in_range = beta_22_lower <= beta_22 <= beta_22_upper
beta_21_lower = ci2.loc['p_m_2_hat', 0]
beta_21_upper = ci2.loc['p_m_2_hat', 1]
beta_21_in_range = beta_21_lower <= beta_21 <= beta_21_upper

print(f"Q_m^D2: {beta_20_in_range and beta_22_in_range and beta_21_in_range}")

Q_m^D1: True
Q_m^D2: True


In [88]:
from statsmodels.sandbox.regression.gmm import IV2SLS

exog_1 = sm.add_constant(data[["p_m_1", "p_m_2"]])
instruments = sm.add_constant(data[['z_a', 'z_b']])

In [89]:
# 2SLS
iv_model_q1 = IV2SLS(endog=data['q_m_1'], exog=exog_1, instrument=instruments).fit()
print(iv_model_q1.summary())

                          IV2SLS Regression Results                           
Dep. Variable:                  q_m_1   R-squared:                      -0.794
Model:                         IV2SLS   Adj. R-squared:                 -0.803
Method:                     Two Stage   F-statistic:                     15.92
                        Least Squares   Prob (F-statistic):           2.24e-07
Date:                Sun, 01 Feb 2026                                         
Time:                        17:26:24                                         
No. Observations:                 400                                         
Df Residuals:                     397                                         
Df Model:                           2                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          9.4341      0.898     10.503      0.0

In [90]:
# 2SLS
iv_model_q2 = IV2SLS(endog=data['q_m_2'], exog=exog_1, instrument=instruments).fit()
print(iv_model_q1.summary())

                          IV2SLS Regression Results                           
Dep. Variable:                  q_m_1   R-squared:                      -0.794
Model:                         IV2SLS   Adj. R-squared:                 -0.803
Method:                     Two Stage   F-statistic:                     15.92
                        Least Squares   Prob (F-statistic):           2.24e-07
Date:                Sun, 01 Feb 2026                                         
Time:                        17:26:25                                         
No. Observations:                 400                                         
Df Residuals:                     397                                         
Df Model:                           2                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          9.4341      0.898     10.503      0.0

For $Q_m^{D1}$, the intercept is 9.4341 with SE = 0.898, the coefficient for $P_m^1$ = -0.8636 with SE = 0.265 and the coefficient for $P_m^2$ = 0.7012 with SE = 0.330. 

For $Q_m^{D2}$, the intercept is 15.8618 with SE = 0.899, the coefficient for $P_m^1$ = 0.3359 with SE = 0.265, and the coefficient for $P_m^2$ = -1.2955 with SE = 0.330.

Indeed, all the true parameters fall within the 95% confidence intervals of the estimates.

### Question 4

![Solution for Question 1](image2.png)

### Question 5

No, consistent estimation of all demand coefficients is not possible. With only two instruments ($Z_m^A$ and $Z_m^B$) available to identify three endogenous prices ($P_m^1$, $P_m^2$, $P_m^3$), we face an identification problem:

The third price $P_m^3$ becomes a deterministic function of the other two prices, creating perfect multicollinearity that prevents us from separately identifying all the demand parameters.