# Monte Carlo experiments in Berry (1994)

We need the following libraries

In [13]:
# Import libraries
import pandas as pd
import numpy as np
from scipy.optimize import fsolve
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.iolib.summary2 import summary_col
from linearmodels.iv import IV2SLS

## Model

Utility: $u_{ij} = \beta_0 + \beta_x x_j + \sigma_d \xi_j - \alpha p_j + \epsilon_{ij} \equiv \delta_j + \epsilon_{ij}$

Marginal cost: $c_j = e^{\gamma_0 + \gamma_x x_j + \sigma_c \xi_j + \gamma_w w_j + \sigma_\omega \omega_j}$

where:
- $i$ indexes individuals
- $j\in{1, 2}$: indexes products
- $u_{i0} = \epsilon_{i0}$: utility of outside good
- $\epsilon_{ij}$: logit error
- $x_j$, $\xi_j$, $w_j$, $\omega_j$: exogenous data drawn independently from a standard normal distribution.
- $\sigma_d$, $\sigma_c$, $\sigma_\omega$: parameters describing the effect of the unobservables $\xi$ and $\omega$
- $\beta_0$, $\beta_x$, $\alpha$: parameters to be estimated
- $\gamma_0$, $\gamma_x$, $\gamma_w$: parameters of marginal cost function

With a duopoly (+ outside option) market shares $s_j$ are given by

$s_j = \frac{e^{\delta_j}}{1+e^{\delta_{-j}}+e^{\delta_j}}$

Partial derivative is given by

$\frac{\partial s_j}{\partial p_j} = - \alpha s_j (1 - s_j)$

Profits

$$\pi_j= p_j M s_j -  c_j M s_j$$

First order condition

$$p_j = c_j + \frac{s_j}{\mid\frac{\partial s_j}{\partial p_j}\mid}$$
$$p_j = c_j + \frac{1}{\alpha (1-s_j)}$$

where

- $c_j$ is (constant) marginal cost

System of non-linear equations:

$$p_1 - c_1 - \frac{1}{\alpha (1-s_1)} = 0$$

$$p_2 - c_2 - \frac{1}{\alpha (1-s_2)} = 0$$

$$s_1 - \frac{e^{\delta_1}}{1+e^{\delta_{1}}+e^{\delta_2}} = 0$$

$$s_2 - \frac{e^{\delta_2}}{1+e^{\delta_{1}}+e^{\delta_2}} = 0$$

where

$$\delta_j \equiv \beta_0 + \beta_x x_j + \sigma_d \xi_j - \alpha p_j$$


#### Implement model by defining a class

In [14]:
class Duopoly():
    r"""
    Implements duopoly market for Berry (1994) Monte Carlo simulation.
    """
    def __init__(self, β_0 = 5, 
                       β_x = 2, 
                       α   = 1,
                       γ_0 = 1, 
                       γ_x = .5,
                       γ_w = .25, 
                       σ_ω = .25, 
                       σ_c = .25, 
                       σ_d = 1,
                       x1  = 0,
                       x2  = 0,
                       ξ1  = 0,
                       ξ2  = 0,
                       w1  = 0,
                       w2  = 0, 
                       ω1  = 0,
                       ω2  = 0):
        
        # Unpack demand parameters
        self.β_0, self.β_x, self.α, self.σ_d = β_0, β_x, α, σ_d
        
        # Unpack cost parameters
        self.γ_0, self.γ_x, self.γ_w, self.σ_ω, self.σ_c =  γ_0, γ_x, γ_w, σ_ω, σ_c
        
        # Unpack exogenous characteristics
        self.x1, self.x2 = x1, x2
        self.ξ1, self.ξ2 = ξ1, ξ2
        self.w1, self.w2 = w1, w2
        self.ω1, self.ω2 = ω1, ω2
        
    def update_exogenous(self, seed):
        """Update exogenous characteristics."""
        draw = np.random.default_rng(seed).normal(size=8)
        self.x1, self.x2, self.ξ1, self.ξ2, self.w1, self.w2, self.ω1, self.ω2 = draw

    def δ(self, p, pid=1):
        """Mean utility for product i as a function of price p."""
        if pid == 1:
            return self.β_0 + self.β_x * self.x1 + self.σ_d * self.ξ1 - self.α * p
        elif pid == 2:
            return self.β_0 + self.β_x * self.x2 + self.σ_d * self.ξ2 - self.α * p
        else:
            raise ValueError("pid must be 1 or 2")

    def c(self, pid=1):
        """Marginal cost"""
        # Unpack parameters
        γ_0, γ_x, γ_w, σ_ω, σ_c = self.γ_0, self.γ_x, self.γ_w, self.σ_ω, self.σ_c
        if pid == 1:
            return np.exp(γ_0 + γ_x * self.x1 + σ_c * self.ξ1 + γ_w * self.w1 + σ_ω * self.ω1)
        elif pid == 2:
            return np.exp(γ_0 + γ_x * self.x2 + σ_c * self.ξ2 + γ_w * self.w2 + σ_ω * self.ω2)
        else:
            raise ValueError("pid must be 1 or 2")
        
    def s(self, p1, p2, pid=1):
        """Demand for product i as a function of prices p1 and p2."""
        if pid == 1:
            return np.exp(self.δ(p1, 1)) / (1 + np.exp(self.δ(p1, 1)) + np.exp(self.δ(p2, 2)))
        elif pid == 2:
            return np.exp(self.δ(p2, 2)) / (1 + np.exp(self.δ(p1, 1)) + np.exp(self.δ(p2, 2)))
        else:
            raise ValueError("pid must be 1 or 2")
    
    def π(self, p1, p2, pid=1):
        """Profits for product i as a function of prices p1 and p2."""
        if pid == 1:
            return p1 * self.s(p1, p2, 1) - self.c(1) * self.s(p1, p2, 1)
        elif pid == 2:
            return p2 * self.s(p1, p2, 2) - self.c(2) * self.s(p1, p2, 2)
        else:
            raise ValueError("pid must be 1 or 2")

    def solve_eq(self):
        """Solve for equilibrium prices and shares using Scipy's fsolve.
           Solve the problem for prices a system of two equations in two unknowns.
        """
        # Unpack parameters
        α = self.α
        # Define function for solver
        def f(eq):
            p1, p2 = eq
            f0 = p1 - self.c(1) - 1 / (α * (1-self.s(p1, p2, 1)))
            f1 = p2 - self.c(2) - 1 / (α * (1-self.s(p1, p2, 2)))
            return np.array([f0, f1])

        # Set initial conditions and solve
        eq0 = np.array([.5, .5])
        try:
            sol = fsolve(f, eq0)
            s1 = self.s(sol[0], sol[1], 1)
            s2 = self.s(sol[0], sol[1], 2)
            return np.append(sol, [s1, s2])
        
        except RuntimeError:
            return np.array([np.nan, np.nan, np.nan, np.nan])


    
    def simulate_mkt(self, T=500, seed=42):
        """Generate equilibrium solutions for T markets and save in pandas dataframe."""
        # Create index for dataframe
        tuples = [(x, y) for x in ['p', 's', 'x', 'ξ', 'w', 'ω', 'π' ] for y in ['1', '2']]
        index = pd.MultiIndex.from_tuples(tuples, names=['vars', 'firm'])
        # Initialize dataframe
        df_mkt = pd.DataFrame(columns=index)
        df_mkt = df_mkt.rename_axis('mkt_id')
        # Loop over markets
        for t in range(T):
            # Update exogenous characteristics
            semilla = 100000*seed + t # Different scale, so we don't get the same draws for i + t = t + i
            self.update_exogenous(semilla)
            # Solve for equilibrium
            eq = self.solve_eq()
            # Calculate profits and save equilibrium
            π1 = self.π(eq[0], eq[1], 1)
            π2 = self.π(eq[0], eq[1], 2)
            df_mkt.loc[t] = np.hstack([eq, self.x1, self.x2, self.ξ1, self.ξ2, self.w1, self.w2, self.ω1, self.ω2, π1, π2])
        return df_mkt

    def simulate_mc(self, T, S):
        """Simulate and run estimation for S samples, with T markets in each simulation."""
        # Initialize dataframe
        tuples = [(x, y) for x in ['ols', 'iv'] for y in ['β_0', 'β_x', 'α']]
        index = pd.MultiIndex.from_tuples(tuples, names=['est', 'param'])
        df_mc = pd.DataFrame(columns=index)
        print("Starting simulation for {} samples with {} markets each...".format(S, T))
        for i in range(S):
            print("Sample {} of {}".format(i+1, S))
            df_s = self.simulate_mkt(T, i)
            # Reshape data to long format where each row is a firm + market
            df_s = df_s.stack('firm')

            # Aggregate market shares by mkt_id
            df_s['s0'] = 1 - df_s.groupby('mkt_id')['s'].transform('sum')

            # Create variables for regression
            df_s['δ'] = np.log(df_s['s']) - np.log(df_s['s0'])
            df_s['const'] = 1
            df_s['pn'] = -1 * df_s['p']
            # Create variable with the value of x for the other firm
            df_s['x_oth'] = df_s.groupby('mkt_id')['x'].transform('sum') - df_s['x']

            # Drop if inf
            df_s = df_s.replace([np.inf, -np.inf], np.nan)

            # Drop if Nan
            df_s = df_s.dropna()

            # Linear Regression
            ols = sm.OLS(endog=df_s['δ'], exog=df_s[['const', 'x', 'pn']]).fit()

            # IV Regression
            iv = IV2SLS(dependent=df_s['δ'],
                        exog=df_s[['const', 'x']],
                        endog=df_s['pn'] ,
                        instruments=df_s[['w', 'x_oth']]).fit(cov_type='unadjusted')
            # Export results
            df_mc.loc[i] = np.hstack([ols.params, iv.params])

        # Obtain empirical mean and standard error for each parameter and organize in a table
        df_mc = df_mc.agg(['mean', 'std']).rename_axis('stats')
        df_mc = df_mc.stack('param').reorder_levels(['param', 'stats']).sort_index(level=0).sort_index(axis=1, ascending=False)

        # Add True value
        df_mc.loc[('α', 'mean'), 'true'] = self.α
        df_mc.loc[('β_0', 'mean'), 'true'] = self.β_0
        df_mc.loc[('β_x', 'mean'), 'true'] = self.β_x

        return df_mc

#### Run Monte Carlo experiments for $\sigma_d=1,3$

In [15]:
%%capture
# Create market object with σ_d = 1
m1 = Duopoly(σ_d=1)
tab1 = m1.simulate_mc(T=500, S=100)

# Create market object with σ_d = 3
m3 = Duopoly(σ_d=3)
tab3 = m3.simulate_mc(T=500, S=100)

In [16]:
# Combine data frames and create output table
t1, t3 = tab1.copy(), tab3.copy()
t1.columns = pd.MultiIndex.from_product([['σ_d = 1'], tab1.columns])
t3.columns = pd.MultiIndex.from_product([['σ_d = 3'], tab3.columns])
tab = pd.concat([t1, t3], axis=1)
tab['order'] = [5, 6, 1, 2, 3, 4]
tab = tab.sort_values(by='order', axis=0)
del tab['order']
tab = tab.drop('true', axis=1, level=1)

**Results in the paper**


<img src="./resources/t1s8berry94.png" alt="tab" width="70%" height="70%">

**Replication results**

In [17]:
# Print 
print(tab)

              σ_d = 1             σ_d = 3          
est               ols        iv       ols        iv
param stats                                        
β_0   mean   3.137898  5.037987 -0.851951  5.114327
      std    0.209233  0.278901  0.396680  0.930597
β_x   mean   1.322600  2.017797  0.014046  2.050721
      std    0.073916  0.112454  0.128180  0.347609
α     mean   0.629683  1.008993 -0.121830  1.025936
      std    0.042591  0.055916  0.078055  0.179668


Observations:
- Results for columns (1), (2), and (4) are very similar to those in the paper.
- Results for column (3) have the opposite sigin for $\beta_0$ and $\alpha$, but this is consistent with the analysis in the text.
- When S=1000, the estimated coefficients get much closer to the true values for column (4).