In [3]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from linearmodels.iv import IV2SLS
import statsmodels.api as sm
import numpy.linalg as npl

In [7]:
df = pd.read_csv('../output/data/final_dataset.csv')
df.head()


Unnamed: 0,rank,app,developer,country,app_id,price,ndownloads,lowerbound,upperbound,averagescore,...,inapppurchases,estimateddownloads,date,newgroup,iap,highest_rank,lowest_rank,multiplier,new_est,nest
0,476,KUNI Cam,GinnyPix,Sweden,com.ginnypix.kuni,4.49,"50,000+",50000,100000,4.5,...,True,50000,9/21/19,OUTSIDE,1,12,498,102.88066,52263.375,OUTSIDE
1,452,Link2SD Plus (New),Bulent Akpinar,Spain,com.buak.link2sdplus,2.35,"100,000+",100000,500000,3.3,...,False,282840,9/21/19,OUTSIDE,0,2,493,814.66394,133401.22,OUTSIDE
2,291,Pushy,medienwerkstatt,Germany,de.fk.android.pushy,1.49,"10,000+",10000,50000,4.4,...,False,25040,9/21/19,OUTSIDE,0,11,494,82.815735,26811.594,OUTSIDE
3,397,EasyMSR,DEFTUN TECH,United States,com.gbtf.msrx6pro,19.99,"10,000+",10000,50000,3.5,...,False,10000,9/21/19,OUTSIDE,0,9,500,81.466393,18391.039,OUTSIDE
4,154,GTA: Liberty City Stories,Rockstar Games,Australia,com.rockstargames.gtalcs,6.99,"100,000+",100000,500000,4.3,...,False,253380,9/21/19,OUTSIDE,0,3,489,823.04529,375720.16,OUTSIDE


# Part I Demand Estimation
### 1.1 Berry Logit

Compute market share

In [46]:
# Ok by country because we need US later, but remember to write in pdf

In [47]:
"""
df['total_market'] = df['new_est'].sum()
df['s_j'] = df['new_est'] / df['total_market']

#compute market share of outside good
df['Q_out'] = df[df['nest'].str.upper() == 'OUTSIDE']['new_est'].sum()
df['s_0'] = df['Q_out'] / df['total_market']
df = df[df['nest'].str.upper() != 'OUTSIDE'].copy() #remove outside good from dataset"""

"\ndf['total_market'] = df['new_est'].sum()\ndf['s_j'] = df['new_est'] / df['total_market']\n\n#compute market share of outside good\ndf['Q_out'] = df[df['nest'].str.upper() == 'OUTSIDE']['new_est'].sum()\ndf['s_0'] = df['Q_out'] / df['total_market']\ndf = df[df['nest'].str.upper() != 'OUTSIDE'].copy() #remove outside good from dataset"

In [8]:
#total market by country
df['total_market'] = df.groupby('country')['new_est'].transform('sum')
df['s_j'] = df['new_est'] / df['total_market']

#compute market share of outside good
outside_by_country = df[df['nest'].str.upper() == 'OUTSIDE'].groupby('country')['new_est'].sum()
df['Q_out'] = df['country'].map(outside_by_country)
df['s_0'] = df['Q_out'] / df['total_market']
df = df[df['nest'].str.upper() != 'OUTSIDE'].copy() #remove outside good from dataset

Estimate Berry equation without intruments for prices

In [9]:
epsilon = 1e-10  # avoid log(0)
df['y'] = np.log(df['s_j'].clip(lower=epsilon)) - np.log(df['s_0'].clip(lower=epsilon))

formula = 'y ~ price + averagescore + iap'

df_clean = df.dropna(subset=['y', 'price', 'averagescore', 'iap', 'country']) #Added df clean cause soemthing was NA

ols = smf.ols(formula=formula, data=df_clean).fit(
    cov_type="cluster", cov_kwds={"groups": df_clean["country"]}
)
print(ols.summary())

fe = smf.ols(formula + " + C(country)", data=df_clean).fit(
    cov_type="cluster", cov_kwds={"groups": df_clean["country"]}
)
print(fe.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.061
Model:                            OLS   Adj. R-squared:                  0.060
Method:                 Least Squares   F-statistic:                     87.67
Date:                dom, 02 nov 2025   Prob (F-statistic):           5.79e-08
Time:                        18:49:58   Log-Likelihood:                -9838.1
No. Observations:                4504   AIC:                         1.968e+04
Df Residuals:                    4500   BIC:                         1.971e+04
Df Model:                           3                                         
Covariance Type:              cluster                                         
                   coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------
Intercept       -7.3341      0.308    -23.827   



Estimate Berry equation with price instruments

In [10]:
# One more instrument: number of apps in the same genre (across all countries)

df['Z_numapps'] = (
    df.groupby('appgenre')['app_id'].transform('count').astype(float)
)
df["Z_numapps"] = np.log(df["Z_numapps"].clip(lower=1))

# new: instrument = number of other apps by same developer (exclude the app itself)
df['Z_dev_count'] = df.groupby('developer')['app_id'].transform('count').astype(float)
df['Z_dev_other'] = (df['Z_dev_count'] - 1.0).clip(lower=0.0)   # number of other apps
# log-transform (clip at 1 so single-product devs map to 0)
df['Z_dev_other'] = np.log(df['Z_dev_other'].clip(lower=1.0)) # skewed counts

cols = ["y", "price", "averagescore", "iap", "country", "Z_numapps", "Z_dev_other"]
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=cols)
df["iap"] = df["iap"].astype(int)
df["country"] = df["country"].astype(str)

fe = pd.get_dummies(df["country"], prefix="cty", drop_first=True)
X = pd.concat([df[["averagescore", "iap"]], fe], axis=1)

# IV with two instruments
iv_res = IV2SLS(
    dependent=df["y"],
    exog=pd.DataFrame({"const": 1}, index=df.index).join(X),
    endog=df[["price"]],
    instruments=df[["Z_numapps", "Z_dev_other"]],
).fit(cov_type="clustered", clusters=df["country"])

print("\n=== IV (2SLS) with country FE, clustered SE by country (two instruments) ===")
print(iv_res.summary)

# First stage (report): include both instruments
fs_mod = smf.ols(
    "price ~ Z_numapps + Z_dev_other + averagescore + iap + C(country)",
    data=df
).fit(cov_type="cluster", cov_kwds={"groups": df["country"]})

print("\n=== First Stage OLS (clustered by country) ===")
print(fs_mod.summary())

# Joint test for instruments (Wald / F)
try:
    w = fs_mod.f_test("Z_numapps = 0, Z_dev_other = 0")
    fval = float(w.fvalue) if hasattr(w, "fvalue") else float(w.statistic)
    pval = float(w.pvalue) if hasattr(w, "pvalue") else np.nan
    print(f"\n[first-stage] joint F (Z_numapps & Z_dev_other) = {fval:.2f}, p = {pval:.3g}")
except Exception:
    pass

# Partial R² of instruments given controls+FE (multivariate analogue)
controls_fe = "averagescore + iap + C(country)"
res_p = smf.ols("price ~ " + controls_fe, data=df).fit().resid
# regress each instrument on controls, then compute R^2 of matrix of residuals projection
res_z1 = smf.ols("Z_numapps ~ " + controls_fe, data=df).fit().resid
res_z2 = smf.ols("Z_dev_other ~ " + controls_fe, data=df).fit().resid
Zres = np.column_stack([res_z1, res_z2])
# compute multivariate partial R2 via projection of res_p on columns of Zres
proj = Zres @ npl.pinv(Zres.T @ Zres) @ Zres.T
SSR_reduced = ((res_p - proj.dot(res_p))**2).sum()
SSR_full = (res_p**2).sum()
partial_R2_multi = 1.0 - SSR_reduced / SSR_full
print(f"[First-stage] multivariate partial R² (instruments | controls+FE): {partial_R2_multi:.4f}")


=== IV (2SLS) with country FE, clustered SE by country (two instruments) ===
                          IV-2SLS Estimation Summary                          
Dep. Variable:                      y   R-squared:                     -9.0652
Estimator:                    IV-2SLS   Adj. R-squared:                -9.0965
No. Observations:                4504   F-statistic:                -4.162e+18
Date:                dom, nov 02 2025   P-value (F-stat)                1.0000
Time:                        18:56:59   Distribution:                 chi2(14)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                  
                    Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------------
const                 -16.702



In [11]:
df['Z_numapps'] = (
    df.groupby(['country','appgenre'])['app_id'].transform('count').astype(float)
)

df["Z_numapps"] = np.log(df["Z_numapps"].clip(lower=1))


cols = ["y", "price", "averagescore", "iap", "country", "Z_numapps"]
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=cols)
df["iap"] = df["iap"].astype(int)
df["country"] = df["country"].astype(str)

fe = pd.get_dummies(df["country"], prefix="cty", drop_first=True)
X = pd.concat([df[["averagescore", "iap"]], fe], axis=1)

iv_res = IV2SLS(
    dependent=df["y"],
    exog=pd.DataFrame({"const": 1}, index=df.index).join(X),
    endog=df[["price"]],
    instruments=df[["Z_numapps"]],
).fit(cov_type="clustered", clusters=df["country"])

print("\n=== IV (2SLS) with country FE, clustered SE by country ===")
print(iv_res.summary)


# First stage with country FE via C(country)
fs_mod = smf.ols(
    "price ~ Z_numapps + averagescore + iap + C(country)",
    data=df
).fit(cov_type="cluster", cov_kwds={"groups": df["country"]})

print("\n=== First Stage OLS (clustered by country) ===")
print(fs_mod.summary())

# Robust F for excluded instrument (single Z)
t_iv = fs_mod.tvalues["Z_numapps"]
F_iv = float(t_iv**2)
print(f"\n[First-stage] robust t(Z_numapps) = {t_iv:.2f}  →  robust F = {F_iv:.2f}")

# Partial R^2 of instrument given controls + FE
controls_fe = "averagescore + iap + C(country)"
res_p = smf.ols("price ~ " + controls_fe, data=df).fit().resid
res_z = smf.ols("Z_numapps ~ " + controls_fe, data=df).fit().resid
partial_R2 = np.corrcoef(res_p, res_z)[0, 1] ** 2
print(f"[First-stage] partial R² (given controls+FE): {partial_R2:.4f}")


=== IV (2SLS) with country FE, clustered SE by country ===
                          IV-2SLS Estimation Summary                          
Dep. Variable:                      y   R-squared:                     -0.4128
Estimator:                    IV-2SLS   Adj. R-squared:                -0.4172
No. Observations:                4504   F-statistic:                 1.093e+19
Date:                dom, nov 02 2025   P-value (F-stat)                0.0000
Time:                        18:58:19   Distribution:                 chi2(14)
Cov. Estimator:             clustered                                         
                                                                              
                                 Parameter Estimates                                  
                    Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------------
const                 -4.9876     1.0820    -4.



### 1.2 Nested Logit model

In [13]:
S_k = df.groupby(['country','nest'])['s_j'].transform('sum')
df['s_j_given_k'] = (df['s_j'] / S_k).clip(lower=1e-12)
df['ln_within'] = np.log(df['s_j_given_k'])


fe = pd.get_dummies(df["country"], prefix="cty", drop_first=True)
X_exog = pd.concat([df[["averagescore","iap","ln_within"]], fe], axis=1)
X_exog = sm.add_constant(X_exog)

Y = df["y"]
P = df[["price"]]        
Z = df[["Z_numapps"]]      

# OLS with formulas and country FE (C(country))
ols = smf.ols("y ~ price + averagescore + iap + ln_within + C(country)", data=df)\
         .fit(cov_type="cluster", cov_kwds={"groups": df["country"]})

# IV: use linearmodels formula interface
iv = IV2SLS.from_formula(
    "y ~ 1 + averagescore + iap + ln_within + C(country) "
    "+ [price ~ Z_numapps]",
    data=df
).fit(cov_type="clustered", clusters=df["country"])

# First stage (for reporting only)
fs = smf.ols("price ~ Z_numapps + averagescore + iap + ln_within + C(country)", data=df)\
        .fit(cov_type="cluster", cov_kwds={"groups": df["country"]})

t_z = fs.tvalues.get("Z_numapps", np.nan)
F_z = float(t_z**2) if np.isfinite(t_z) else np.nan

# Partial R² of Z given controls+FE
res_p = smf.ols("price ~ averagescore + iap + ln_within + C(country)", data=df).fit().resid
res_z = smf.ols("Z_numapps ~ averagescore + iap + ln_within + C(country)", data=df).fit().resid
partial_R2 = float(np.corrcoef(res_p, res_z)[0, 1] ** 2)

print("\n=== Nested Logit: OLS (clustered by country) ===")
print(ols.summary())

print("\n=== Nested Logit: IV-2SLS (price instrumented; clustered by country) ===")
print(iv.summary)

print("\n=== First Stage (price on Z + controls + ln_within + FE) ===")
print(fs.summary())
print(f"\n[First-stage] robust t(Z_numapps) = {t_z:.2f}  →  robust F = {F_z:.2f}")
print(f"[First-stage] partial R² (given controls+FE): {partial_R2:.4f}")

# Comparison table
def pick(res, name):
    params = getattr(res, "params")  # both have .params
    # std errors: statsmodels -> .bse ; linearmodels -> .std_errors
    ses = getattr(res, "bse", None)
    if ses is None:
        ses = getattr(res, "std_errors")  # linearmodels IVResults
    params = pd.Series(params)
    ses = pd.Series(ses)

    # normalize constant name
    const_name = "Intercept" if "Intercept" in params.index else ("const" if "const" in params.index else None)

    keep = ["ln_within", "price", "averagescore", "iap"]
    if const_name:
        keep.append(const_name)

    rows = []
    for k in keep:
        if k in params.index:
            rows.append([k, params[k], ses[k]])
    out = pd.DataFrame(rows, columns=["param", f"{name}_coef", f"{name}_se"])
    return out

tab = pick(ols, "OLS").merge(pick(iv, "IV"), on="param", how="outer")

# pretty order: lambda first, then price, X's, const (Intercept/const label preserved)
order = ["ln_within", "price", "averagescore", "iap", "Intercept", "const"]
tab["order"] = tab["param"].apply(lambda x: order.index(x) if x in order else 99)
tab = tab.sort_values("order").drop(columns="order")

print("\n=== Comparison table (Nested logit: OLS vs IV) ===")
print(tab.to_string(index=False))


=== Nested Logit: OLS (clustered by country) ===
                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.806
Model:                            OLS   Adj. R-squared:                  0.805
Method:                 Least Squares   F-statistic:                     2413.
Date:                dom, 02 nov 2025   Prob (F-statistic):           4.19e-16
Time:                        18:59:38   Log-Likelihood:                -6286.7
No. Observations:                4504   AIC:                         1.261e+04
Df Residuals:                    4488   BIC:                         1.271e+04
Df Model:                          15                                         
Covariance Type:              cluster                                         
                                   coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------



# Part II supply analysis

2.1 cost estimation

In [33]:
# Guido
df = pd.read_csv('../output/data/final_dataset.csv')
df.head()

#total market by country
df['total_market'] = df.groupby('country')['new_est'].transform('sum')
df['s_j'] = df['new_est'] / df['total_market']

#compute market share of outside good
outside_by_country = df[df['nest'].str.upper() == 'OUTSIDE'].groupby('country')['new_est'].sum()
df['Q_out'] = df['country'].map(outside_by_country)
df['s_0'] = df['Q_out'] / df['total_market']

df_us = df.loc[df['country'] == 'United States'].copy()
df_us = df_us[df_us['nest'].str.upper() != 'OUTSIDE'].copy() #remove outside good from dataset

epsilon = 1e-10  # avoid log(0)
df['y'] = np.log(df['s_j'].clip(lower=epsilon)) - np.log(df['s_0'].clip(lower=epsilon))



need = ["s_j", "price"]
df_us = df_us.replace([np.inf, -np.inf], np.nan).dropna(subset=need).copy()
df_us['y'] = np.log(df_us['s_j'].clip(lower=epsilon)) - np.log(df_us['s_0'].clip(lower=epsilon))

n = len(df_us)
p = df_us['price'].to_numpy().astype(float)

#retrieve alpha hat from previous estimation
alpha_hat = float(-0.4778) * (-1)  

#compute ownership matrix
firms = df_us['developer'].astype('category').cat.codes.to_numpy()
Omega_current = (firms[:, None] == firms[None, :]).astype(float)

#Compute Jacobian J = ds/dp
s = df_us['s_j'].to_numpy().astype(float)
outer_ss = np.outer(s, s) 
J = - alpha_hat * (np.diag(s) - outer_ss)

def calculate_markups(Omega, ownership): #define it to use it later in point 2.2
    #Markups from FOCs: 
    G = -(Omega * J)  # Hadamard product
    markup =  npl.solve(G, s)
    mc = p - markup
    pct = np.where(np.isfinite(markup / p), markup / p, np.nan)
    #Save marginal costs and markups
    df_summary = pd.DataFrame(
        {
            "ownership": ownership,
            "markup": markup,
            "mc": mc,
            "pct_markup": pct,
            "developer": df_us["developer"].to_numpy(),
            "app": df_us["app"].to_numpy(),
        },
        index=df_us.index,   # ensure 1:1 join on index
    )
    return df_summary
#attach marginal costs to df_us
df_us['mc'] = calculate_markups(Omega_current, "current")['mc'].to_numpy()
#same for markups
df_us['markup'] = calculate_markups(Omega_current, "current")['markup'].to_numpy()

#check own price elasticity
df_us['own_price_elast'] = alpha_hat * df_us['price'] * (1.0 - df_us['s_j'])



In [34]:
#make summary of df_us

summary = df_us[['markup','mc','own_price_elast']].agg(['mean','median','std', 'min','max'])
print(summary)
"""by_firm = None
if firm_col:
    by_firm = df_us.groupby(firm_col)[['markup','mc','pct_markup']].median().sort_values('pct_markup', ascending=False)

print("Overall (US) markup / cost summary:")
print(summary)

if by_firm is not None:
    print("\nMedian markup/cost by firm (US):")
    print(by_firm.head(100)) """


          markup         mc  own_price_elast
mean    2.104248   2.118757         2.013477
median  2.095473   1.294875         1.618033
std     0.026289   3.601288         1.719660
min     2.092927  -1.171780         0.458439
max     2.474347  27.896365        14.325545


'by_firm = None\nif firm_col:\n    by_firm = df_us.groupby(firm_col)[[\'markup\',\'mc\',\'pct_markup\']].median().sort_values(\'pct_markup\', ascending=False)\n\nprint("Overall (US) markup / cost summary:")\nprint(summary)\n\nif by_firm is not None:\n    print("\nMedian markup/cost by firm (US):")\n    print(by_firm.head(100)) '

2.2 markup estimation

In [35]:
#Guido

# Build Omega under three scenarios ---
n = len(df_us)

# Single-product
Omega_single = np.eye(n)

# Monopolist (everyone same owner)
Omega_monopoly = np.ones((n, n), dtype=float)

# --- 3) Compute markups ---
res_current = calculate_markups(Omega_current, 'current')
res_single  = calculate_markups(Omega_single, 'single')
res_monopoly = calculate_markups(Omega_monopoly, 'monopolist')

#append all three results together
res_all = pd.concat([res_current, res_single, res_monopoly], ignore_index=True)
#summarize results
summary_tbl = (res_all
    .groupby("ownership")
    .agg(markup_mean=('markup','mean'),
         markup_median=('markup','median'),
         mc_mean=('mc','mean'),
         mc_median=('mc','median'))
    .round(3)
)
print(summary_tbl)

#summary_tbl.to_latex("table_markups.tex", caption="Markup and marginal cost summaries", label="tab:markups")




            markup_mean  markup_median  mc_mean  mc_median
ownership                                                 
current           2.104          2.095    2.119      1.295
monopolist       17.845         17.845  -13.622    -14.455
single            2.098          2.094    2.125      1.295


In [83]:
"""# --- 1) Helpers ---
def ownership_matrix(ids):
    """Ω_{jk}=1 if same firm, else 0."""
    return (ids[:,None] == ids[None,:]).astype(float)

def markups_from_omega(dfm, alpha, Omega, ridge=1e-10):
    s = dfm['s_j'].to_numpy(float)
    p = dfm['price'].to_numpy(float)
    J = alpha * (np.diag(s) - np.outer(s, s))                  # ds/dp
    G = Omega * J                                              # Hadamard
    # Solve G m = -s
    try:
        m = -npl.solve(G, s)
    except npl.LinAlgError:
        m = -npl.solve(G + ridge*np.eye(len(s)), s)            # light ridge
    mc = p - m
    out = pd.DataFrame({'markup': m, 'mc': mc, 'price': p})
    out['pct_markup'] = np.where(np.isfinite(out['markup']/out['price']),
                                 out['markup']/out['price'], np.nan)
    return out

# --- 2) Build Ω under three scenarios ---
n = len(df_us)
# Current ownership
if firm_col is not None:
    firm_ids = df_us[firm_col].astype('category').cat.codes.to_numpy()
    Omega_current = ownership_matrix(firm_ids)
else:
    Omega_current = np.eye(n)   # if you don't have firm info, current==single-product

# Single-product
Omega_single = np.eye(n)

# Monopolist (everyone same owner)
Omega_mono = np.ones((n, n), dtype=float)

# --- 3) Compute markups ---
res_current = markups_from_omega(df_us, alpha_hat, Omega_current)
res_single  = markups_from_omega(df_us, alpha_hat, Omega_single)
res_mono    = markups_from_omega(df_us, alpha_hat, Omega_mono)


df_us = df_us.reset_index(drop=True)
res_current = res_current.reset_index(drop=True)
df_us[['mc', 'markup', 'pct_markup']] = res_current[['mc', 'markup', 'pct_markup']]


# --- 4) Summaries into a table ---
def summarize(dfm):
    return pd.Series({
        'markup_mean': dfm['markup'].mean(),
        'markup_median': dfm['markup'].median(),
        'pct_markup_mean': dfm['pct_markup'].mean(),
        'pct_markup_median': dfm['pct_markup'].median(),
        'mc_median': dfm['mc'].median()
    })

summary_tbl = pd.concat({
    'Current ownership': summarize(res_current),
    'Single-product':    summarize(res_single),
    'Monopolist':        summarize(res_mono),
}, axis=1)

print("\n=== Markups under alternative ownership (US, Berry-logit) ===")
print(summary_tbl.round(3).to_string()) """



=== Markups under alternative ownership (US, Berry-logit) ===
                   Current ownership  Single-product  Monopolist
markup_mean                    2.104           2.098      17.845
markup_median                  2.095           2.094      17.845
pct_markup_mean                0.831           0.829       7.053
pct_markup_median              0.619           0.619       5.268
mc_median                      1.295           1.295     -14.455


2.3 merger simulation

In [None]:
#merger simulation
y = df_us['y']


def shares_logit(p_vec: np.ndarray) -> np.ndarray:
    util = y 
    expu = np.exp(util)
    denom = 1.0 + np.sum(expu)
    return expu / denom #return the shares

def compute_jacobian(s: np.ndarray) -> np.ndarray:
    outer_ss = np.outer(s, s)
    J = - alpha_hat * (np.diag(s) - outer_ss)  # shape (n,n)
    return J

def solve_nevo_eq5(mc_vec, Omega, p_init=None, tol:float=1e-3, max_iter:int=1000, damping=0.6):
    """
    Solve Nevo (2000) equation (5):
        s(p) + (Ω ⊙ J(p)) (p - mc) = 0
    for equilibrium prices p.
    
    Args:
        mc_vec:   Marginal costs for each product.
        Omega:    Ownership matrix, where Ω[j,k] = 1 if products j and k are jointly owned.
        p_init:   Initial price vector. If None, it will use df_us['price'].
        tol:      Convergence tolerance for FOC residuals.
        max_iter: Maximum number of iterations.
        damping:  Relaxation factor (0.4–0.8 recommended).
    """
    # if no initial guess provided, start from your observed prices in df_us
    p = df_us["price"].to_numpy(float).copy() if p_init is None else np.array(p_init, float).copy()
    
    for i in range(max_iter):
        # Compute current market shares under current prices
        s_now = shares_logit(p)
        
        # Compute Jacobian ds/dp under simple logit
        J_now = compute_jacobian(s_now)

        # Step 3: Hadamard product
        G = -(Omega * J_now)

        # Step 4: fixed-point mapping -> solve G * step = s
        step = npl.solve(G, s_now)
        p_new = mc_vec + step

        # Step 5: apply damping (helps convergence)
        p = (1 - damping) * p + damping * p_new

        # Step 6: compute FOC residuals F(p) = s + (Ω⊙J)(p - mc)
        res = s_now + (Omega * J_now) @ (p - mc_vec)
        # Step 7: check convergence
        if np.max(np.abs(res)) < tol:
            print(f"Converged in {i+1} iterations, max residual = {np.max(np.abs(res)):.2e}")
            return p, True

    # If not converged
    print("Did not converge; try increasing max_iter or adjusting damping.")
    return p, False

#simulate merger
unique_developers = df_us['developer'].unique()
targets = [f for f in unique_developers if f != 'Mojang']

rows = []
for j, t in enumerate(targets):
    print('target number', j,'/',len(targets), ': ', t)
    # Make a copy of the us_market DataFrame for the current developer
    developer_data = df_us.copy()
    #update name of t to mojang
    developer_data.loc[developer_data['developer'] == t, 'developer'] = 'Mojang'

    #compute Omega under this ownership structure
    firms = developer_data['developer'].astype('category').cat.codes.to_numpy()
    Omega_post = (firms[:, None] == firms[None, :]).astype(float)
    #solve for new prices
    p_eq, converged = solve_nevo_eq5(
        mc_vec=df_us['mc'].to_numpy(float),
        Omega=Omega_post,
        p_init=df_us['price'].to_numpy(float),
        tol=1e-5,
        max_iter=1000,
        damping=0.5
    )
    #store results and profits
    mojang_mask = developer_data['developer'].eq('Mojang').to_numpy()
    rows.append({"target": t, "converged": bool(converged),
             "mojang_prices": p_eq[mojang_mask].tolist(),
             "mojang_profit": float(np.sum((p_eq - df_us['mc'].to_numpy(float)) * shares_logit(p_eq) * mojang_mask))}) #to check if shares are correct

    



target number 0 / 282 :  Catana Comics
Converged in 1 iterations, max residual = 3.15e-06
target number 1 / 282 :  Toca Boca
Converged in 8 iterations, max residual = 5.17e-06
target number 2 / 282 :  XtraMath
Converged in 5 iterations, max residual = 8.67e-06
target number 3 / 282 :  Aviation Supplies & Academics
Converged in 3 iterations, max residual = 5.71e-06
target number 4 / 282 :  TMI Media, LLC
Converged in 1 iterations, max residual = 5.99e-06
target number 5 / 282 :  The Japan Times Publishing
Converged in 1 iterations, max residual = 2.81e-06
target number 6 / 282 :  Signal Stuff
Converged in 1 iterations, max residual = 7.26e-07
target number 7 / 282 :  Sago Mini
Converged in 1 iterations, max residual = 3.52e-06
target number 8 / 282 :  TSO (The Stationery Office)
Converged in 1 iterations, max residual = 5.26e-07
target number 9 / 282 :  August Software
Converged in 3 iterations, max residual = 5.05e-06
target number 10 / 282 :  Vito Technology
Converged in 5 iterations,

In [43]:
df_results = pd.DataFrame(rows)
df_results.sort_values(by='mojang_profit', ascending=False)

Unnamed: 0,target,converged,mojang_prices,mojang_profit
32,Rockstar Games,True,"[5.414185732366245, 7.414185732366246, 7.41418...",0.529716
113,Fireproof Games,True,"[7.086628854730549, 2.398585098125068, 1.39858...",0.478184
50,SQUARE ENIX Ltd,True,"[1.3980425123550182, 7.083506561421336]",0.474993
171,ninja kiwi,True,"[7.082316611735774, 3.397322032267666, 5.39732...",0.473866
48,Bravestars Games,True,"[1.3906385852754748, 7.059405016050848]",0.450923
...,...,...,...,...
102,My Town Games Ltd,True,"[6.990004509339033, 3.1807117720834364]",0.381429
176,Open Lab Games,True,"[6.9900022546654075, 6.18071113054163]",0.381425
209,Ahmed Bousrih,True,"[6.990001916465072, 2.1807110343106384]",0.381424
42,Blazes,True,"[1.6807107188873376, 6.990000807920796]",0.381422


In [None]:
"""import numpy as np
import pandas as pd
import numpy.linalg as npl

# 0) US slice + alpha + mc
df_us = df.loc[df['country']=='United States'].copy()
df_us = df_us.replace([np.inf,-np.inf], np.nan).dropna(subset=['s_j','price']).reset_index(drop=True)

alpha = float(-0.4778)  # IV logit price coefficient (Berry-logit)

# Ensure we have mc/markup in df_us (write from current-ownership markups if missing)
def ownership_matrix(ids):
    return (ids[:,None] == ids[None,:]).astype(float)

def markups_from_omega(dfm, alpha, Omega, ridge=1e-10):
    s = dfm['s_j'].to_numpy(float)
    p = dfm['price'].to_numpy(float)
    J = alpha * (np.diag(s) - np.outer(s, s))              # ∂s/∂p under simple logit
    G = Omega * J
    try:
        m = -npl.solve(G, s)
    except npl.LinAlgError:
        m = -npl.solve(G + ridge*np.eye(len(s)), s)
    mc = p - m
    out = pd.DataFrame({'markup': m, 'mc': mc, 'price': p})
    out['pct_markup'] = np.where(np.isfinite(out['markup']/out['price']),
                                 out['markup']/out['price'], np.nan)
    return out

if 'mc' not in df_us.columns:
    if 'developer' in df_us.columns:
        firm_ids_cur = df_us['developer'].astype('category').cat.codes.to_numpy()
        Omega_current = ownership_matrix(firm_ids_cur)
    else:
        Omega_current = np.eye(len(df_us))
    res_cur = markups_from_omega(df_us, alpha, Omega_current)
    df_us[['mc','markup','pct_markup']] = res_cur[['mc','markup','pct_markup']].reset_index(drop=True)


# 1) Recover baseline δ₀
# One US "market": s0 is outside share
s = df_us['s_j'].to_numpy(float)
s0 = max(1.0 - s.sum(), 1e-12)
delta0 = np.log(np.clip(s, 1e-300, 1)) - np.log(s0)   # δ₀ = ln s_j − ln s0
p0 = df_us['price'].to_numpy(float)
mc = df_us['mc'].to_numpy(float)

# 2) Share and Jacobian at price p
def shares_from_prices(p):
    """Simple logit: δ(p) = δ₀ + α (p - p0); s = exp(δ) / (1 + sum exp(δ))"""
    delta = delta0 + alpha * (p - p0)
    expd = np.exp(np.clip(delta, -700, 700))
    denom = 1.0 + expd.sum()
    return expd / denom

def jacobian_simple_logit(s):
    """∂s/∂p = α [diag(s) − s sᵀ]"""
    return alpha * (np.diag(s) - np.outer(s, s))


# 3) Solve Bertrand FOCs for a given ownership vector
def solve_equilibrium_prices(mc, firm_ids, p_init=None, tol=1e-10, itmax=500, relax=0.6, ridge=1e-10):
    """
    Fixed point on prices:
        m(p) = −(Ω ⊙ J(p))^{-1} s(p),  p = mc + m(p)
    Use damped iterations: p <- (1-relax)*p + relax*(mc + m(p))
    """
    n = len(mc)
    if p_init is None:
        p = p0.copy()
    else:
        p = p_init.copy()
    Omega = ownership_matrix(firm_ids)

    for it in range(itmax):
        s_p = shares_from_prices(p)
        J = jacobian_simple_logit(s_p)
        G = Omega * J
        try:
            m = -npl.solve(G, s_p)
        except npl.LinAlgError:
            m = -npl.solve(G + ridge*np.eye(n), s_p)
        p_new = mc + m
        p_next = (1.0 - relax) * p + relax * p_new
        if np.max(np.abs(p_next - p)) < tol:
            p = p_next
            break
        p = p_next
    else:
        # didn't converge; still return last iterate
        pass
    return p, shares_from_prices(p)


# 4) Baseline stats for Mojang  
if 'developer' not in df_us.columns:
    raise ValueError("Need a 'developer' column to identify Mojang and targets.")

# One categorical mapping for the WHOLE df_us 
dev_cat   = df_us['developer'].astype('category')
dev_codes = dev_cat.cat.codes.to_numpy()
categories = list(dev_cat.cat.categories)

# Helper: get the (unique) code for any developer name from the global dev_codes
def code_for_developer(name: str) -> int:
    codes = np.unique(dev_codes[df_us['developer'] == name])
    if len(codes) == 0:
        raise ValueError(f"Developer not found: {name}")
    return int(codes[0])

# Mojang id from the global dev_codes (not from a re-categorized subset)
moj_mask_any = df_us['developer'].str.contains('Mojang', case=False)
if not moj_mask_any.any():
    raise ValueError("No developer matching 'Mojang' found in US data.")
mojang_id = int(np.unique(dev_codes[moj_mask_any])[0])

# Baseline equilibrium under current ownership
p_eq0, s_eq0 = solve_equilibrium_prices(mc, dev_codes, p_init=p0)
if not np.isfinite(p_eq0).all():
    raise RuntimeError("Baseline price solver did not converge to finite prices.")

moj_mask0 = (dev_codes == mojang_id)
mojang_base_share = float(s_eq0[moj_mask0].sum())
mojang_base_rev   = float((p_eq0[moj_mask0] - 0.0) @ s_eq0[moj_mask0])  # revenue per market-size=1
mojang_base_prof  = float(((p_eq0[moj_mask0] - mc[moj_mask0]) @ s_eq0[moj_mask0]))


# 5) Try each target 
results = []
for target in df_us['developer'].unique():
    if 'mojang' in target.lower():
        continue

    # Use the GLOBAL dev_codes mapping for the target
    target_code = code_for_developer(target)

    # merged ownership: map target's code to Mojang's code
    dev_codes_merged = dev_codes.copy()
    dev_codes_merged[dev_codes == target_code] = mojang_id

    # Solve post-merger equilibrium
    try:
        p_star, s_star = solve_equilibrium_prices(mc, dev_codes_merged, p_init=p_eq0)
        if not (np.isfinite(p_star).all() and np.isfinite(s_star).all()):
            raise RuntimeError("Non-finite solution")
    except Exception as e:
        # If a target fails to converge, skip it but record the failure
        results.append({'target': target, 'Δshare': np.nan, 'Δrevenue': np.nan, 'Δprofit': np.nan})
        continue

    moj_mask = (dev_codes_merged == mojang_id)
    share_new = float(s_star[moj_mask].sum())
    rev_new   = float((p_star[moj_mask] @ s_star[moj_mask]))
    prof_new  = float(((p_star[moj_mask] - mc[moj_mask]) @ s_star[moj_mask]))

    results.append({
        'target': target,
        'Δshare':  share_new - mojang_base_share,
        'Δrevenue': rev_new   - mojang_base_rev,
        'Δprofit':  prof_new  - mojang_base_prof
    })

res = pd.DataFrame(results).sort_values('Δprofit', ascending=False)
print("\n=== Mojang: best acquisition candidates (simple logit, US) ===")
print(res.head(10).to_string(index=False))



=== Mojang: best acquisition candidates (simple logit, US) ===
                                target   Δshare  Δrevenue  Δprofit
                        Rockstar Games 0.035134  0.224158 0.107230
                       Fireproof Games 0.023314  0.045329 0.070134
                       SQUARE ENIX Ltd 0.022562  0.022206 0.067810
                            ninja kiwi 0.022305  0.099189 0.067016
                      Bravestars Games 0.016895  0.016703 0.050429
                 Ubisoft Entertainment 0.016102  0.027817 0.048018
Warner Bros. International Enterprises 0.014868  0.035699 0.044272
                     Clickteam USA LLC 0.013008  0.042771 0.038646
                  Ironhide Game Studio 0.012641  0.033701 0.037538
               Coffee Stain Publishing 0.011815  0.065984 0.035053
