# 0. Setup

In [1]:
!pip install -q pyBLP

In [2]:
# Libraries
import pandas as pd
import numpy as np
import pyblp
from IPython.display import display, HTML

pyblp.options.verbose = False

In [3]:
# Data
df = pd.read_csv('pset2_data.csv')\
                 .drop(columns = 'Unnamed: 0')
df['firm_ids'] = df['firm_ids'] + 1

true_diversion_ratios = pd.read_csv('true_diversionratio.csv')\
                                    .drop(columns = 'Unnamed: 0')

true_elasticities = pd.read_csv('true_ownprice_elasticities.csv')\
                                .drop(columns = 'Unnamed: 0')
true_elasticities = true_elasticities.iloc[0].to_list()
true_elasticities = [round(value, 8) for value in true_elasticities]

# 1. Estimating some mis-specified models

In [None]:
# 1. Plain logit (without IVs)
df['demand_instruments0'] = df['prices']
X1_formulation = pyblp.Formulation('0 + x + satellite + wired + prices')
formulations = (X1_formulation)
problem = pyblp.Problem(formulations, df)
pl_results = problem.solve(method = '1s')

In [None]:
# 2. Plain logit (with IVs)
df['demand_instruments0'] = df['w']
df['demand_instruments1'] = df['x'] ** 2
df['demand_instruments2'] = df['x'] ** 3
df['demand_instruments3'] = df['w'] ** 2
df['demand_instruments4'] = df['w'] ** 3
df['demand_instruments5'] = df['x'] * df['w']
df['demand_instruments6'] = np.exp(df['x'] + df['w'])
problem = pyblp.Problem(formulations, df)
pliv_results = problem.solve(method = '1s')

The results from the IV logit model are very close to the true estimates suggested in the pdf. In contrast, the plain logit model only estimates $\beta^{(1)}$ well whereas all the other coefficients are much smaller in magnitude (although the signs are still correct).

In [None]:
# 3. Nested logit (with IVs)
df['nesting_ids'] = df['satellite']
problem = pyblp.Problem(formulations, df)
bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-6})
nliv_results = problem.solve(rho = [-0.5, -0.5],
                             optimization = bfgs,
                             method = '1s')

In this logit model with seperate nests for wired and satellite televisions, the assumption is that once consumers choose into one of these nests, they don't care about the other (the IIA assumption). However, that might not hold necessarily. For example, if satellite televisions start being bundled with internet access, then they could become more attractive for wired television consumers, which would clearly violate the IIA assumption.

In [7]:
# 4. Own-price elasticities and diversion ratios
nliv_elasticities = nliv_results.compute_elasticities()
nliv_diversion_ratios = nliv_results.compute_diversion_ratios()

estimated_elasticities = np.array([np.mean(np.array(nliv_elasticities)[j::4],
                                           axis = 0)[j] for j in range(4)])

estimated_diversion_ratios = np.mean([nliv_diversion_ratios[i:i+4] for i in
                                      range(0, len(nliv_diversion_ratios), 4)],
                                      axis = 0)

In [8]:
# Results
mis_spec_results = pd.DataFrame(index = ['x', 'satellite',
                                         'wired', 'prices'],
                           data = {
                                    ('Estimates', 'Logit'): pl_results.beta.reshape(-1),
                                    ('Estimates', '+IV'): pliv_results.beta.reshape(-1),
                                    ('Estimates', '+nesting'): nliv_results.beta.reshape(-1),
                                    ('SEs', 'Logit'): pl_results.beta_se.reshape(-1),
                                    ('SEs', '+IV'): pliv_results.beta_se.reshape(-1),
                                    ('SEs', '+nesting'): nliv_results.beta_se.reshape(-1),
                                    }
                           )

nesting_results = pd.DataFrame(index = ['satellite', 'wired'],
                               data = {
                                    ('Nesting parameter'): nliv_results.rho.reshape(-1),
                                    ('SEs'): nliv_results.rho_se.reshape(-1)
                                      }
                              )

nesting_elasticity_results = pd.DataFrame(index = ['1', '2', '3', '4'],
                               data = {
                                    ('True elasticities'): true_elasticities,
                                    ('Estimated elasticities'): list(estimated_elasticities)
                                      }
                              )

nesting_diversion_results = pd.DataFrame(estimated_diversion_ratios,
                                         columns = ['J1', 'J2', 'J3', 'J4'])

In [9]:
mis_spec_results

Unnamed: 0_level_0,Estimates,Estimates,Estimates,SEs,SEs,SEs
Unnamed: 0_level_1,Logit,+IV,+nesting,Logit,+IV,+nesting
x,0.941886,1.041872,0.586131,0.125056,0.138905,0.222216
satellite,1.198787,4.000048,2.866038,0.171969,0.939494,0.899872
wired,1.210808,3.986083,3.126458,0.173985,0.931615,1.001046
prices,-1.021837,-2.082596,-1.2983,0.063558,0.355365,0.411238


In [10]:
nesting_results

Unnamed: 0,Nesting parameter,SEs
satellite,0.812108,0.497555
wired,0.594339,0.527749


In [11]:
nesting_elasticity_results

Unnamed: 0,True elasticities,Estimated elasticities
1,-4.249973,-5.368461
2,-4.371438,-5.61525
3,-4.271128,-10.500021
4,-4.25763,-10.552587


In [12]:
html = f"""
<div style="display: flex;">
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-bottom: 4px;">Diversion ratios via nested logit</h4>
        {nesting_diversion_results.to_html(index=True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">True diversion ratios</h4>
        {true_diversion_ratios.to_html(index=True)}
    </div>
</div>
"""

display(HTML(html))

Unnamed: 0,J1,J2,J3,J4
0,0.211558,0.501493,0.139318,0.147631
1,0.510961,0.214677,0.137197,0.137165
2,0.095531,0.095721,0.135904,0.672844
3,0.095725,0.085744,0.676927,0.141605

Unnamed: 0,J1,J2,J3,J4
0,0.333785,0.216986,0.220439,0.22879
1,0.219427,0.336985,0.221614,0.221974
2,0.219347,0.218454,0.334973,0.227226
3,0.224641,0.215012,0.223199,0.337148


# 2. Estimating the correct model

In [None]:
# 5.1. Mixed logit (with demand IVs)
df = df.drop(columns = 'nesting_ids')
X1_formulation = pyblp.Formulation('0 + x + satellite + wired + prices')
X2_formulation = pyblp.Formulation('0 + satellite + wired')
formulations = (X1_formulation, X2_formulation)
integration = pyblp.Integration('product', size = 5)
problem = pyblp.Problem(formulations, df, integration = integration)
bfgs = pyblp.Optimization('bfgs', {'gtol': 1e-6})
blpd_results = problem.solve(sigma = np.eye(2),
                             optimization = bfgs,
                             method = '1s')

In [None]:
# 5.2. Mixed logit (with demand + supply IVs)
df['supply_instruments0'] = df['x']
df['supply_instruments1'] = df['x'] ** 2
df['supply_instruments2'] = df['x'] ** 3
df['supply_instruments3'] = np.exp(df['x'])

X3_formulation = pyblp.Formulation('1 + w')
formulations = (X1_formulation, X2_formulation, X3_formulation)
integration = pyblp.Integration('product', size = 5)
problem = pyblp.Problem(formulations, df,
                        integration = integration)
optimization = pyblp.Optimization('trust-constr',
                                  {'gtol': 1e-3, 'xtol': 1e-3})
blpds_results = problem.solve(beta = blpd_results.beta,
                              sigma = blpd_results.sigma,
                              optimization = optimization,
                              method = '1s',
                              initial_update = True,
                              beta_bounds = [-np.ones(4)*10, np.ones(4)*10],
                              gamma_bounds = [-np.ones(2)*10, np.ones(2)*10])

In [None]:
# 5.3. Using optimal instruments
instrument_results = blpd_results.compute_optimal_instruments(method = 'approximate')

updated_problem = instrument_results.to_problem()

blp_opt_results = updated_problem.solve(beta = -np.ones(4),
                                        sigma = blpd_results.sigma,
                                        optimization = optimization,
                                        method = '1s')

For calculating the diversion ratios and own-price elasticities, I use the optimal IV estimates because by definition, they give the most efficient estimates. This can also be seen by inspecting the standard errors for each of these models. Notice that the standard errors for the optimal IV estimates are the smallest. The caveat is that my 2SLS matrix is highly multicollinear (hence unstable), so estimates might be a bit off from what's expected.

In [16]:
# 6. Diversion ratios using optimal IVs
new_elasticities = blp_opt_results.compute_elasticities()
new_diversion_ratios = blp_opt_results.compute_diversion_ratios()

new_estimated_elasticities = np.array([np.mean(np.array(new_elasticities)[j::4],
                                       axis = 0)[j] for j in range(4)])

new_estimated_diversion_ratios = np.mean([new_diversion_ratios[i:i+4] for i in
                                          range(0, len(new_diversion_ratios), 4)],
                                          axis = 0)

In [17]:
# 7. Bootstrapped diversion ratios using optimal IVs
bootstrap_results = blp_opt_results.bootstrap(draws = 1000, seed = 4309)

bootstrapped_diversion_ratios = bootstrap_results.compute_diversion_ratios()

diversion_ratios_lb = np.squeeze(np.percentile(bootstrapped_diversion_ratios, 2.5, axis = 0))
diversion_ratios_ub = np.squeeze(np.percentile(bootstrapped_diversion_ratios, 97.5, axis = 0))

diversion_ratios_lb = np.mean([diversion_ratios_lb[i:i+4] for i in
                               range(0, len(diversion_ratios_lb), 4)],
                               axis = 0)

diversion_ratios_ub = np.mean([diversion_ratios_ub[i:i+4] for i in
                               range(0, len(diversion_ratios_ub), 4)],
                               axis = 0)

In [26]:
# Results
blp_results = pd.DataFrame(index = ['x', 'satellite',
                                    'wired', 'prices'],
                           data = {
                                    ('Estimates', 'demand'): blpd_results.beta.reshape(-1),
                                    ('Estimates', '+supply'): blpds_results.beta.reshape(-1),
                                    ('Estimates', '+optimal IV'): blp_opt_results.beta.reshape(-1),
                                    ('SEs', 'demand'): blpd_results.beta_se.reshape(-1),
                                    ('SEs', '+supply'): blpds_results.beta_se.reshape(-1),
                                    ('SEs', '+optimal IV'): blp_opt_results.beta_se.reshape(-1),
                                    }
                           )


blp_elasticity_results = pd.DataFrame(index = ['1', '2', '3', '4'],
                                      data = {
                                            ('True elasticities'): true_elasticities,
                                            ('Estimated elasticities'): list(new_estimated_elasticities)
                                              }
                                      )

blp_diversion_results = pd.DataFrame(new_estimated_diversion_ratios,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

blp_diversion_results_lb = pd.DataFrame(diversion_ratios_lb,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

blp_diversion_results_ub = pd.DataFrame(diversion_ratios_ub,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

In [27]:
blp_results

Unnamed: 0_level_0,Estimates,Estimates,Estimates,SEs,SEs,SEs
Unnamed: 0_level_1,demand,+supply,+optimal IV,demand,+supply,+optimal IV
x,1.304478,1.364281,1.190945,0.542017,0.401118,0.215234
satellite,4.366622,4.56371,4.625502,1.392575,1.98765,1.287823
wired,5.413915,5.733518,4.337704,2.498048,2.130091,1.24012
prices,-2.667987,-2.79971,-2.378347,1.023475,0.883364,0.514104


In [28]:
blp_elasticity_results

Unnamed: 0,True elasticities,Estimated elasticities
1,-4.249973,-4.687798
2,-4.371438,-4.857513
3,-4.271128,-4.480596
4,-4.25763,-4.485849


In [29]:
html = f"""
<div style="display: flex;">
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-bottom: 4px;">Diversion ratios via BLP with optimal instruments</h4>
        {blp_diversion_results.to_html(index=True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">True diversion ratios</h4>
        {true_diversion_ratios.to_html(index=True)}
    </div>
</div>
"""

display(HTML(html))

Unnamed: 0,J1,J2,J3,J4
0,0.387125,0.3225,0.141689,0.148686
1,0.32839,0.389741,0.140827,0.141042
2,0.15796,0.157414,0.292899,0.391728
3,0.160292,0.150885,0.391911,0.296912

Unnamed: 0,J1,J2,J3,J4
0,0.333785,0.216986,0.220439,0.22879
1,0.219427,0.336985,0.221614,0.221974
2,0.219347,0.218454,0.334973,0.227226
3,0.224641,0.215012,0.223199,0.337148


In [30]:
html = f"""
<div style="display: flex;">
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-bottom: 4px;">2.5th percentile of diversion ratios</h4>
        {blp_diversion_results_lb.to_html(index = True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">True diversion ratios</h4>
        {true_diversion_ratios.to_html(index = True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">97.5th percentile of diversion ratios</h4>
        {blp_diversion_results_ub.to_html(index = True)}
    </div>
</div>
"""

display(HTML(html))

Unnamed: 0,J1,J2,J3,J4
0,0.196244,0.200631,0.074902,0.07935
1,0.199536,0.201616,0.073242,0.072417
2,0.08536,0.082192,0.160838,0.233137
3,0.087221,0.076922,0.227013,0.168783

Unnamed: 0,J1,J2,J3,J4
0,0.333785,0.216986,0.220439,0.22879
1,0.219427,0.336985,0.221614,0.221974
2,0.219347,0.218454,0.334973,0.227226
3,0.224641,0.215012,0.223199,0.337148

Unnamed: 0,J1,J2,J3,J4
0,0.508826,0.516386,0.255985,0.266592
1,0.524558,0.507243,0.257488,0.261807
2,0.252087,0.256629,0.43971,0.578857
3,0.255178,0.253669,0.582819,0.439164


In [31]:
within_bounds = (
                (true_diversion_ratios >= blp_diversion_results_lb) &
                (true_diversion_ratios <= blp_diversion_results_ub)
                )

within_bounds

Unnamed: 0,J1,J2,J3,J4
0,True,True,True,True
1,True,True,True,True
2,True,True,True,False
3,True,True,False,True


The within_bounds dataframe tells us whether the true diversion ratios would be captured by the 95% confidence band from the bootstrapped diversion ratios (using optimal IVs). We can see that 14/16 of the diversion ratios would find a place within the bootstrapped confidence interval.

# 3. Merger analysis

8. In this differentiated products Bertrand setup, firms set
prices to best respond to the current market situations. The intensity of competition negatively affects an individual firm's price-setting ability. After a merger between two firms, the merging entities face reduced competition (the exact amount of which can be captured by the diversion ratios). This means that they can set higher prices. The pricing power of the non-merging firms changes naturally, but the extent of this depends on the context.

In [32]:
# 9. Firms 1 and 2 merge
df['merger_ids'] = df['firm_ids'].replace(2, 1)
changed_prices_21 = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = blp_opt_results.compute_costs()
                                       )

# 10. Firms 1 and 3 merge
df['merger_ids'] = df['firm_ids'].replace(3, 1)
changed_prices_31 = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = blp_opt_results.compute_costs()
                                       )

merger_avg_price_21 = changed_prices_21[df['firm_ids'].replace(2, 1) == 1].mean()
merger_avg_price_31 = changed_prices_31[df['firm_ids'].replace(3, 1) == 1].mean()

In [33]:
merger_results = pd.DataFrame(index = ['Merger-induced prices'],
                                      data = {
                                            ('2-1 merger'): merger_avg_price_21,
                                            ('3-1 merger'): merger_avg_price_31
                                              }
                                      )
merger_results

Unnamed: 0,2-1 merger,3-1 merger
Merger-induced prices,2.961015,2.7633


The merger between firms 3 and 1 would result in lower average prices as compared to the merger between firms 2 and 1.

11. A merged-induced reduction in marginal costs means that even if the merged firm has greater market power and tends to set higher prices, the underlying production cost savings can translate into lower prices or higher output. The efficiency gains can result in improved consumer surplus if the cost savings are passed on to consumers, potentially leading to an overall gain in social welfare.



In [34]:
# 12. Effects of merger's cost reduction on CS
cs_pre = blp_opt_results.compute_consumer_surpluses()

df['merger_ids'] = df['firm_ids'].replace(2, 1)
reduced_costs = blp_opt_results.compute_costs().copy()
reduced_costs[df.merger_ids== 1] = 0.85 * reduced_costs[df.merger_ids== 1]

changed_prices = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = reduced_costs
                                       )

cs_post = blp_opt_results.compute_consumer_surpluses(changed_prices)

delta_cs = sum(cs_post - cs_pre)
print(f' Change in consumer surplus due to merger: {delta_cs[0]}')

 Change in consumer surplus due to merger: -6.1834082763981755


13. A constant $M_t$ means that every market $t$ contributed equally to the aggregate consumer surplus. However, if $M_t$ varies across markets, then the true aggregate consumer surplus would be a weighted average of each market's consumer surplus, where the weights would be
\begin{gather*}
w_t = \frac{M_t}{\sum_{t' = 1}^T M_{t'}}
\end{gather*}
meaning that $w_t$ is the proportion of the total consumers served by each market. Aggregate consumer surplus is then expressed as
\begin{gather*}
CS_{overall} = \sum_{t = 1}^T w_t CS_t
\end{gather*}