In [2]:
# import packages
import pandas as pd
import numpy as np
import statsmodels.api as sm
from numba import jit, njit, prange
import matplotlib.pyplot as plt
from scipy.optimize import minimize

# 1 Data

## 1.1 Observed Data 
For $J$ products in $T$ markets, suppose we only observe
- shares
- product retail prices
- product characteristics

### $J$ products, $T$ markets, and $K$ product characteristics 

In [3]:
# set the seed
np.random.seed(1019)
# number of products
J = 10
# dimension of product characteristics including the intercept (=1)
K = 5
# number of markets
T = 10

### Product Characteristics: X
X is the data frame such that a row contains the characteristics vector $x_j$ of a product and columns are product index and observed product characteristics. The dimension of the characteristics $K$ is specified above. Add the row of the outside option whose index is 0 and all the characteristics are zero.

- The product-market characteristics: $$x_{j1}=1 \text{  (meaning intercept)  }, x_{jk}\sim N(0, \sigma_x), k=2,\cdots,K,$$ where $\sigma_x$ is referred to as sd_x in the code.

In [4]:
# X product characteristics
sd_x = 2

col = []
for i in range(K):
    col.append('x_'+str(i+1))

X = np.random.normal(scale = sd_x, size = (J+1,K))
X = pd.DataFrame(X, columns = col)

X['x_1'] = 1
X['j'] = X.index
X.loc[X['j'] == 0, col] = 0

print('shape of X: ', X.shape)
X.head()

shape of X:  (11, 6)


Unnamed: 0,x_1,x_2,x_3,x_4,x_5,j
0,0,0.0,0.0,0.0,0.0,0
1,1,-0.735912,-0.580679,3.158113,0.46829,1
2,1,3.493566,1.896881,-0.235369,-0.651372,2
3,1,4.584802,3.15391,4.112289,1.729779,3
4,1,1.638937,0.032865,-5.254284,6.444651,4


### Market-Product Characteristics: M

In [5]:
# j = 1, 2, ..., J; t = 1, 2, ..., T
M = pd.DataFrame([(j+1,1) for j in range(J)], columns = ['j', 'temp'])\
.merge(pd.DataFrame([(t+1,1) for t in range(T)], columns = ['t', 'temp']), how = 'outer')
M.drop(columns = 'temp', inplace =True)

- The product-market specific fixed effect $\xi_{jt}$: $p_{jt}$ can be correlated with $\xi_{jt}$ but $x_{jt}$s are independent of $\xi_{jt}$.
  1. We set $\xi_{jt} = 0$ in this task.
  1. Or we draw $\xi_{jt}$ from i.i.d. normal distribution with mean 0 and standard deviation $\sigma_{\xi}$ in the next task.: $$\xi_{jt}\sim Normal(0, \sigma_{\xi})$$


- The marginal cost of product $j$ in market $t$: $$c_{jt}\sim logNormal(0, \sigma_c),$$ where $\sigma_c$ is referred to as sd_c in the code.

- The retail price: $$p_{jt}-c_{jt} \sim logNormal(\gamma \xi_{jt}, \sigma_p),$$ where $\gamma$ is referred to as price_xi, and $\sigma_p$ as sd_p in the code. This price is not the equilibrium price.

In [6]:
price_xi = 1
# sd_xi = 0.5
sd_c = 0.05
sd_p = 0.05

M['xi'] = np.zeros(shape = (len(M),1))
# M['xi'] = np.random.normal(scale = sd_xi, size = (len(M),1))
M['c'] = np.random.lognormal(sigma = sd_c, size = (len(M),1))
M['p'] = M.apply(lambda x: np.random.lognormal(mean = price_xi * x.xi, sigma = sd_p), axis = 1)
M['p'] = M['p'] + M['c']

## Simulate the Process of Generating Data

The indirect utility of consumer $i$ in market $t$ for product $j$ is: $$u_{itj}=\beta_{it}'x_j + \alpha_{it}p_{jt} +\xi_{jt} + \epsilon_{ijt}$$