# 0. Setup

In [1]:
!pip install -q pyBLP

In [2]:
# Libraries
import pandas as pd
import numpy as np
import pyblp
from IPython.display import display, HTML

# pyBLP settings
pyblp.options.verbose = False
bfgs = pyblp.Optimization('l-bfgs-b', {'gtol': 1e-8})
integration = pyblp.Integration('product', size = 5)

In [3]:
# Data
df = pd.read_csv('pset2_data.csv')\
                 .drop(columns = 'Unnamed: 0')
df['firm_ids'] = df['firm_ids'] + 1

true_diversion_ratios = pd.read_csv('true_diversionratio.csv')\
                                    .drop(columns = 'Unnamed: 0')

true_elasticities = pd.read_csv('true_ownprice_elasticities.csv')\
                                .drop(columns = 'Unnamed: 0')
true_elasticities = true_elasticities.iloc[0].to_list()
true_elasticities = [round(value, 8) for value in true_elasticities]

# Sum of characteristics of market alternatives
char_iv_t = df.groupby(by = ['market_ids'])['x']\
              .sum().to_frame('quality_sum_t').reset_index()
df = pd.merge(df, char_iv_t, on = ['market_ids'])
df['char_iv_t'] = df['quality_sum_t'] - df['x']

# Sum of characteristics in nest alternative (IV for nested logit)
char_iv_tn = df.groupby(by = ['market_ids', 'satellite'])['x']\
                .sum().to_frame('quality_sum_tn').reset_index()
df = pd.merge(df, char_iv_tn, on = ['market_ids', 'satellite'])
df['char_iv_tn'] = df['quality_sum_tn'] - df['x']

# 1. Estimating some mis-specified models

In [4]:
# 1. Plain logit (without IVs)
df['demand_instruments0'] = df['prices']

X1_formulation = pyblp.Formulation('0 + x + satellite + wired + prices')
formulations = (X1_formulation)

problem = pyblp.Problem(formulations, df)
pl_results = problem.solve(method = '1s')

In [5]:
# 2. Plain logit (with IVs)
df['demand_instruments0'] = df['w']
df['demand_instruments1'] = df['w'] ** 2
df['demand_instruments2'] = df['w'] ** 3
df['demand_instruments3'] = np.exp(df['w'])
df['demand_instruments4'] = df['char_iv_t']
df['demand_instruments5'] = df['char_iv_t'] ** 2
df['demand_instruments6'] = df['char_iv_t'] ** 3
df['demand_instruments7'] = np.exp(df['char_iv_t'])
df['demand_instruments8'] = np.exp(df['w'] * df['char_iv_t'])
df['demand_instruments9'] = np.exp(df['w'] + df['char_iv_t'])

problem = pyblp.Problem(formulations, df)
pliv_results = problem.solve(method = '1s')

In [6]:
# 3. Nested logit (with IVs)
df['nesting_ids'] = df['satellite']

df['demand_instruments10'] = df['char_iv_tn']
df['demand_instruments11'] = df['char_iv_tn'] ** 2
df['demand_instruments12'] = df['char_iv_tn'] ** 3
df['demand_instruments13'] = np.exp(df['char_iv_tn'])
df['demand_instruments14'] = np.exp(df['w'] * df['char_iv_tn'] * df['char_iv_t'])
df['demand_instruments15'] = np.exp(df['w'] + df['char_iv_tn'] + df['char_iv_t'])

problem = pyblp.Problem(formulations, df)
nliv_results = problem.solve(rho = [2, 2],
                             optimization = bfgs,
                             method = '1s',
                             rho_bounds = [-np.ones(2) * 20, np.ones(2) * 20])

In this logit model with seperate nests for wired and satellite televisions, the assumption is that once consumers choose into one of these nests, they don't care about the other (the IIA assumption). However, that might not hold necessarily. For example, if satellite televisions start being bundled with internet access, then they could become more attractive for wired television consumers, which would clearly violate the IIA assumption.

In [7]:
# 4. Own-price elasticities and diversion ratios
nliv_elasticities = nliv_results.compute_elasticities()
nliv_diversion_ratios = nliv_results.compute_diversion_ratios()

estimated_elasticities = np.array([np.mean(np.array(nliv_elasticities)[j::4],
                                           axis = 0)[j] for j in range(4)])

estimated_diversion_ratios = np.mean([nliv_diversion_ratios[i:i+4] for i in
                                      range(0, len(nliv_diversion_ratios), 4)],
                                      axis = 0)

In [12]:
# Results
mis_spec_results = pd.DataFrame(index = ['x', 'satellite',
                                         'wired', 'prices'],
                           data = {
                                    ('Estimates', 'Logit'): pl_results.beta.reshape(-1),
                                    ('Estimates', '+IVs'): pliv_results.beta.reshape(-1),
                                    ('Estimates', '+nests'): nliv_results.beta.reshape(-1),
                                    ('SEs', 'Logit'): pl_results.beta_se.reshape(-1),
                                    ('SEs', '+IVs'): pliv_results.beta_se.reshape(-1),
                                    ('SEs', '+nests'): nliv_results.beta_se.reshape(-1),
                                    }
                           )

nesting_results = pd.DataFrame(index = ['wired', 'satellite'],
                               data = {
                                    ('Nesting parameter'): nliv_results.rho.reshape(-1),
                                    ('SEs'): nliv_results.rho_se.reshape(-1)
                                      }
                              )

nesting_elasticity_results = pd.DataFrame(index = ['1', '2', '3', '4'],
                               data = {
                                    ('True elasticities'): true_elasticities,
                                    ('Estimated elasticities'): list(estimated_elasticities)
                                      }
                              )

nesting_diversion_results = pd.DataFrame(estimated_diversion_ratios,
                                         columns = ['J1', 'J2', 'J3', 'J4'])

In [13]:
mis_spec_results

Unnamed: 0_level_0,Estimates,Estimates,Estimates,SEs,SEs,SEs
Unnamed: 0_level_1,Logit,+IVs,+nests,Logit,+IVs,+nests
x,0.941886,1.059594,0.73373,0.125056,0.141909,0.144328
satellite,1.198787,4.496558,3.536754,0.171969,0.904344,0.761427
wired,1.210808,4.477988,3.427537,0.173985,0.894836,0.853696
prices,-1.021837,-2.27061,-1.587901,0.063558,0.341674,0.315704


The results from the IV logit model are very close to the true estimates suggested in the pdf. In contrast, the plain logit model only estimates $\beta^{(1)}$ well whereas all the other coefficients are much smaller in magnitude (although the signs are still correct).

In [14]:
nesting_results

Unnamed: 0,Nesting parameter,SEs
wired,0.490599,0.277975
satellite,0.56698,0.417002


In [15]:
nesting_elasticity_results

Unnamed: 0,True elasticities,Estimated elasticities
1,-4.249973,-6.235186
2,-4.371438,-6.516101
3,-4.271128,-5.562896
4,-4.25763,-5.567383


In [16]:
html = f"""
<div style="display: flex;">
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-bottom: 4px;">Diversion ratios via nested logit</h4>
        {nesting_diversion_results.to_html(index = True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">True diversion ratios</h4>
        {true_diversion_ratios.to_html(index = True)}
    </div>
</div>
"""

display(HTML(html))

Unnamed: 0,J1,J2,J3,J4
0,0.218902,0.484788,0.144009,0.152301
1,0.493909,0.222131,0.141983,0.141977
2,0.156907,0.156445,0.236397,0.450251
3,0.159143,0.149075,0.45008,0.241701

Unnamed: 0,J1,J2,J3,J4
0,0.333785,0.216986,0.220439,0.22879
1,0.219427,0.336985,0.221614,0.221974
2,0.219347,0.218454,0.334973,0.227226
3,0.224641,0.215012,0.223199,0.337148


Under the nested logit model, diversion ratios are far higher to alternatives in the same nest than to those in the other nest. Moreover, the estimated diversion to the outside option is also roughly 2/3 of what it truly is.

# 2. Estimating the correct model

In [17]:
# 5.1. Mixed logit (with demand IVs)
# dropping nesting specific columns
df = df.drop(columns = ['nesting_ids',
                        'demand_instruments10',
                        'demand_instruments11',
                        'demand_instruments12',
                        'demand_instruments13',
                        'demand_instruments14',
                        'demand_instruments15'])

X1_formulation = pyblp.Formulation('0 + x + satellite + wired + prices')
X2_formulation = pyblp.Formulation('0 + satellite + wired')
formulations = (X1_formulation, X2_formulation)

problem = pyblp.Problem(formulations, df, integration = integration)
blpd_results = problem.solve(sigma = np.eye(2),
                             optimization = bfgs,
                             method = '1s')

In [18]:
# 5.2. Mixed logit (with demand IVs + supply restrictions)
df['supply_instruments0'] = df['x']
df['supply_instruments1'] = df['x'] ** 2
df['supply_instruments2'] = df['x'] ** 3
df['supply_instruments3'] = np.exp(df['x'])

X3_formulation = pyblp.Formulation('1 + w')
formulations = (X1_formulation, X2_formulation, X3_formulation)

problem = pyblp.Problem(formulations, df,
                        integration = integration,
                        costs_type = 'log')

blpds_results = problem.solve(beta = blpd_results.beta,
                              sigma = blpd_results.sigma,
                              optimization = bfgs,
                              method = '1s',
                              initial_update = True,
                              beta_bounds = [-np.ones(4)*10, np.ones(4)*10],
                              gamma_bounds = [-np.ones(2)*10, np.ones(2)*10],
                              sigma_bounds = [-np.ones((2, 2)) * 10, np.ones((2, 2)) * 10],
                              costs_bounds = (1e-4, None))

In [19]:
# 5.3. Using optimal instruments
# Reconstructing the main df with optimal instruments, dropping those
# that are highly collinear with main features of interest
instrument_results = blpds_results.compute_optimal_instruments(method = 'approximate')

df = df.drop(columns = [col for col in df.columns if (col.startswith('demand_instruments')
                                                      or col.startswith('supply_instruments'))])

opt_demand_instruments = instrument_results.demand_instruments
opt_supply_instruments = instrument_results.supply_instruments

opt_demand_df = pd.DataFrame(opt_demand_instruments)
opt_demand_df.columns = [f'demand_instruments{i}' for i in range(opt_demand_df.shape[1])]

opt_supply_df = pd.DataFrame(opt_supply_instruments)
opt_supply_df.columns = [f'supply_instruments{i}' for i in range(opt_supply_df.shape[1])]

def drop_highly_correlated(df_ex, threshold = 0.95):
    corr_matrix = df_ex.corr().abs()
    upper = np.triu(corr_matrix, k = 1)

    to_drop = set()
    cols = df_ex.columns

    for i in range(len(cols)):
        for j in range(i + 1, len(cols)):
            if upper[i, j] > threshold:
                col_i, col_j = cols[i], cols[j]

                is_instr_i = col_i.startswith('demand_instruments') or col_i.startswith('supply_instruments')
                is_instr_j = col_j.startswith('demand_instruments') or col_j.startswith('supply_instruments')

                if is_instr_i and not is_instr_j:
                    to_drop.add(col_i)
                elif is_instr_j and not is_instr_i:
                    to_drop.add(col_j)
                elif is_instr_i and is_instr_j:
                    to_drop.add(col_j)

    return df_ex.drop(columns = list(to_drop))


df = pd.concat([df, opt_demand_df, opt_supply_df], axis = 1)
df = drop_highly_correlated(df)

problem = pyblp.Problem(formulations, df,
                        integration = integration,
                        costs_type = 'log')

blp_opt_results = problem.solve(beta = blpds_results.beta,
                                sigma = blpds_results.sigma,
                                optimization = bfgs,
                                initial_update = True,
                                beta_bounds = [-np.ones(4)*10, np.ones(4)*10],
                                gamma_bounds = [-np.ones(2)*10, np.ones(2)*10],
                                sigma_bounds = [-np.ones((2, 2)) * 10, np.ones((2, 2)) * 10],
                                method = '1s',
                                costs_bounds = (1e-4, None))

For calculating the diversion ratios and own-price elasticities, I use the optimal IV estimates because by definition, they give the most efficient estimates. In addition, the estimates that they produce are very close to the true suggested estimates as well. Note that we can't simply compare the standard errors across these three models because they have different moment conditions and instruments being used.

In [20]:
# 6. Diversion ratios using optimal IVs
new_elasticities = blp_opt_results.compute_elasticities()
new_estimated_elasticities = np.array([np.mean(np.array(new_elasticities)[j::4],
                                       axis = 0)[j] for j in range(4)])

new_diversion_ratios = blp_opt_results.compute_diversion_ratios()
new_estimated_diversion_ratios = np.mean([new_diversion_ratios[i:i+4] for i in
                                          range(0, len(new_diversion_ratios), 4)],
                                          axis = 0)

In [None]:
# 7. Bootstrapped diversion ratios using optimal IVs (takes a long time to compute)
bootstrap_results = blp_opt_results.bootstrap(draws = 1000, seed = 4309)

bootstrapped_diversion_ratios = bootstrap_results.compute_diversion_ratios()

diversion_ratios_lb = np.squeeze(np.percentile(bootstrapped_diversion_ratios, 2.5, axis = 0))
diversion_ratios_ub = np.squeeze(np.percentile(bootstrapped_diversion_ratios, 97.5, axis = 0))

diversion_ratios_lb = np.mean([diversion_ratios_lb[i:i+4] for i in
                               range(0, len(diversion_ratios_lb), 4)],
                               axis = 0)

diversion_ratios_ub = np.mean([diversion_ratios_ub[i:i+4] for i in
                               range(0, len(diversion_ratios_ub), 4)],
                               axis = 0)

blp_diversion_results_lb = pd.DataFrame(diversion_ratios_lb,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

blp_diversion_results_ub = pd.DataFrame(diversion_ratios_ub,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

In [21]:
# Results
blp_results = pd.DataFrame(index = ['x', 'satellite',
                                    'wired', 'prices'],
                           data = {
                                    ('Estimates', 'demand'): blpd_results.beta.reshape(-1),
                                    ('Estimates', '+supply'): blpds_results.beta.reshape(-1),
                                    ('Estimates', '+optimal IV'): blp_opt_results.beta.reshape(-1),
                                    ('SEs', 'demand'): blpd_results.beta_se.reshape(-1),
                                    ('SEs', '+supply'): blpds_results.beta_se.reshape(-1),
                                    ('SEs', '+optimal IV'): blp_opt_results.beta_se.reshape(-1),
                                    }
                           )

blp_supply_results = pd.DataFrame(index = ['constant', 'w'],
                                  data = {
                                          ('Estimates', 'BLP + supply'): blpds_results.gamma.reshape(-1),
                                          ('Estimates', '+optimal IV'): blp_opt_results.gamma.reshape(-1),
                                          ('SEs', 'BLP + supply'): blpds_results.gamma_se.reshape(-1),
                                          ('SEs', '+optimal IV'): blp_opt_results.gamma_se.reshape(-1),
                                         }
                                  )


blp_elasticity_results = pd.DataFrame(index = ['1', '2', '3', '4'],
                                      data = {
                                            ('True elasticities'): true_elasticities,
                                            ('Estimated elasticities'): list(new_estimated_elasticities)
                                              }
                                      )

blp_diversion_results = pd.DataFrame(new_estimated_diversion_ratios,
                                     columns = ['J1', 'J2', 'J3', 'J4'])

In [22]:
blp_results

Unnamed: 0_level_0,Estimates,Estimates,Estimates,SEs,SEs,SEs
Unnamed: 0_level_1,demand,+supply,+optimal IV,demand,+supply,+optimal IV
x,1.167812,1.21118,1.046526,0.366895,0.178484,0.275417
satellite,4.592266,4.711045,4.020642,1.097005,1.025403,6.117178
wired,5.078068,5.367685,4.004707,1.976881,1.145567,6.152495
prices,-2.516231,-2.631508,-2.091166,0.798452,0.44025,2.355243


In [23]:
blp_supply_results

Unnamed: 0_level_0,Estimates,Estimates,SEs,SEs
Unnamed: 0_level_1,BLP + supply,+optimal IV,BLP + supply,+optimal IV
constant,0.542678,0.559902,0.06664,0.445161
w,0.237303,0.201225,0.040112,0.128965


In [24]:
blp_elasticity_results

Unnamed: 0,True elasticities,Estimated elasticities
1,-4.249973,-4.549309
2,-4.371438,-4.687618
3,-4.271128,-4.577483
4,-4.25763,-4.560811


In [25]:
html = f"""
<div style="display: flex;">
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-bottom: 4px;">Diversion ratios via BLP with optimal instruments</h4>
        {blp_diversion_results.to_html(index = True)}
    </div>
    <div style="flex: 1; padding: 0; margin: 0;">
        <h4 style="margin-centre: 4px;">True diversion ratios</h4>
        {true_diversion_ratios.to_html(index = True)}
    </div>
</div>
"""

display(HTML(html))

Unnamed: 0,J1,J2,J3,J4
0,0.337475,0.217244,0.21912,0.226161
1,0.218858,0.341594,0.21914,0.220407
2,0.219152,0.216679,0.338358,0.225811
3,0.222656,0.213781,0.222022,0.34154

Unnamed: 0,J1,J2,J3,J4
0,0.333785,0.216986,0.220439,0.22879
1,0.219427,0.336985,0.221614,0.221974
2,0.219347,0.218454,0.334973,0.227226
3,0.224641,0.215012,0.223199,0.337148


The diversion ratios from the optimal IV estimates are pretty much the same as the true diversion ratios.

# 3. Merger analysis

8. In this differentiated products Bertrand setup, firms set
prices to best respond to the current market situations. The intensity of competition negatively affects an individual firm's price-setting ability. After a merger between two firms, the merging entities face reduced competition (the exact amount of which can be captured by the diversion ratios). This means that they can set higher prices. The pricing power of the non-merging firms changes naturally, but the extent of this depends on the context.

In [26]:
# 9. Firms 1 and 2 merge
df['merger_ids'] = df['firm_ids'].replace(2, 1)
changed_prices_21 = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = blp_opt_results.compute_costs()
                                       )

# 10. Firms 1 and 3 merge
df['merger_ids'] = df['firm_ids'].replace(3, 1)
changed_prices_31 = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = blp_opt_results.compute_costs()
                                       )

merger_avg_price_21 = changed_prices_21[df['firm_ids'].replace(2, 1) == 1].mean()
merger_avg_price_31 = changed_prices_31[df['firm_ids'].replace(3, 1) == 1].mean()

In [27]:
merger_results = pd.DataFrame(index = ['Merger-induced prices'],
                                      data = {
                                            ('2-1 merger'): merger_avg_price_21,
                                            ('3-1 merger'): merger_avg_price_31
                                              }
                                      )

merger_results

Unnamed: 0,2-1 merger,3-1 merger
Merger-induced prices,2.850533,2.814427


The merger between firms 3 and 1 would result in lower average prices as compared to the merger between firms 2 and 1, but the difference is marginal.

11. A merged-induced reduction in marginal costs means that even if the merged firm has greater market power and tends to set higher prices, the underlying production cost savings can translate into lower prices or higher output. The efficiency gains can result in improved consumer surplus if the cost savings are passed on to consumers, potentially leading to an overall gain in social welfare.



In [28]:
# 12. Effects of merger's cost reduction on CS
cs_pre = blp_opt_results.compute_consumer_surpluses()

df['merger_ids'] = df['firm_ids'].replace(2, 1)
reduced_costs = blp_opt_results.compute_costs().copy()
reduced_costs[df.merger_ids== 1] = 0.85 * reduced_costs[df.merger_ids== 1]

changed_prices = blp_opt_results.compute_prices(firm_ids = df['merger_ids'],
                                        costs = reduced_costs
                                       )

cs_post = blp_opt_results.compute_consumer_surpluses(changed_prices)

delta_cs = sum(cs_post - cs_pre)
print(f' Change in consumer surplus due to merger: {delta_cs[0]}')

 Change in consumer surplus due to merger: 15.92163648018013


13. A constant $M_t$ means that every market $t$ contributed equally to the aggregate consumer surplus. However, if $M_t$ varies across markets, then the true aggregate consumer surplus would be a weighted average of each market's consumer surplus, where the weights would be
\begin{gather*}
w_t = \frac{M_t}{\sum_{t' = 1}^T M_{t'}}
\end{gather*}
meaning that $w_t$ is the proportion of the total consumers served by each market. Aggregate consumer surplus is then expressed as
\begin{gather*}
CS_{overall} = \sum_{t = 1}^T w_t CS_t
\end{gather*}