In [64]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from functools import partial
from scipy.integrate import quad
from scipy.integrate import nquad
from scipy.optimize import minimize

### Estimation of parameters

First of all we would like to derive from data estimations of the following parameters:
$\pmb\Sigma$, $\pmb{b}$, $\sigma$, given our data $x_n = (x_{n1}, \dots, x_{nT})$,
sequence of choices $d_n = (d_{n1}, \dots, d_{nT})$ and net utility structure 
$U_{nt} = \beta' x_{nt} + \eta_n + \varepsilon_{nt}$, where $\eta_n$ is iid 
across agents normal with zero-scalar mean and variance $\sigma$ and $\varepsilon_{nt}$
is iid across agents and time normal with zero-vector mean and $\pmb\Omega$ covariance
matrix with following form (from probit\_panel.pdf):
$$
\pmb\Omega =
 \begin{pmatrix*} 
1 + \sigma & \sigma& \dots & \sigma \\
\sigma & 1+\sigma& \dots & \sigma \\
\sigma & \sigma&\dots &1 + \sigma \\
\end{pmatrix*}
$$

$$
\begin{align*}
    P(d_n |x_n) & = \int_{\mathbb{R}^k} \left(\int_{\mathbb{R}}
    P(d_n |\eta, \beta, x_n) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta                              \\
                & =\int_{\mathbb{R}^k}\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
\end{align*}
$$

Then MLE for every person would be the following:
$$
\begin{align*}
    P(d |x) & = \int_{\mathbb{R}^k} \left(\int_{\mathbb{R}}
    P(d |\eta, \beta, x) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta \\
   & = \int_{\mathbb{R}^k} \prod_{n=1}^N\left(\int_{\mathbb{R}}
    P(d_n |\eta, \beta, x_n) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta \\
    & = \int_{\mathbb{R}^k} \prod_{n=1}^N\left(\int_{\mathbb{R}} \prod_{t=1}^T
    P(d_{nt} |\eta, \beta, x_{nt}) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta 
                                 \\
                & =\int_{\mathbb{R}^k}\prod_{n=1}^N\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
\end{align*}
$$

### Parameters

Then MLE for every person would be the following:
$$
                \int_{\mathbb{R}^k}\prod_{n=1}^N\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
$$

In [77]:
def round_arr(arr):
    """ util function """
    return [round(i, 3) for i in arr]


def generate_data(n: int, k: int, t: int,
                  b: np.array, Sigma: np.matrix) -> pd.DataFrame:
    """generates panel data based on input parameters

    Args:
        n (int): amount of agents
        k (int): amount of features of agents
        t (int): amount of time periods each agent made choice
        b (np.array): expectation of tastes
        Sigma (np.matrix): covariance of tastes

    Returns:
        pd.DataFrame: panel data
    """
    data = []

    for n in range(n):

        beta = np.random.multivariate_normal(
            mean=b,
            cov=Sigma,
        ).reshape(-1, 1)

        x_n = pd.DataFrame(
            data=np.random.normal(size=(t, k)),
            columns=[f'k{i}' for i in range(k)]
        )
        utility = x_n @ beta
        x_n['agent'] = n
        x_n['choice'] = np.where(utility > 0, 1, -1)
        data.append(x_n)

    return pd.concat(data)


def multivariate_normal_pdf(x: np.array, expectation: np.array,
                            covariance: np.matrix) -> float:
    """returns pdf of given sample of mv normal distribution

    Args:
        x (np.array): sample
        expectation (np.array): param of normal distribution
        covariance (np.matrix): covariance of mv normal rv

    Returns:
        float: pdf value of sample, given population parameters
    """
    x_m = x - expectation
    return (1. / (np.sqrt((2 * np.pi)**(len(expectation)) * np.linalg.det(covariance))) *
            np.exp(-(np.linalg.solve(covariance, x_m).T.dot(x_m)) / 2))


def d_eta(eta: float, x_n: pd.DataFrame,
          beta: np.array, sigma: float) -> float:
    """returns function under inner integral in point

    Args:
        eta (float): point value
        x_n (pd.DataFrame): data point
        beta (np.array): taste parameter
        sigma (float): variance of noise in utility

    Returns:
        float: under integral function value
    """

    res = norm.cdf(((x_n.iloc[:, :len(beta)] * beta.T).sum(axis=1) +
                   eta).multiply(x_n['choice'], axis=0))

    return res.prod() * norm.pdf(eta, scale=np.sqrt(sigma))


def integrate_d_eta(*beta, panel: pd.DataFrame,
                    b: np.array, sigma: float, Sigma: np.matrix) -> float:
    """product of one-dimensional integrals w.r.t eta

    Args:
        panel (pd.DataFrame): data
        b (np.array): expectation of tastes
        sigma (float): variance of noise of utilities
        Sigma (np.matrix): covariance matrix of tastes

    Returns:
        float: infinite integral value
    """

    n_cumprod = 1
    beta = np.array(beta)
    print(beta)

    for _, x_n in panel.groupby('agent'):
        integral = quad(
            d_eta,
            -np.inf,
            +np.inf,
            args=(x_n, beta, sigma)
        )
        n_cumprod *= integral[0]

    return n_cumprod * multivariate_normal_pdf(
        beta, b, Sigma
    )


def integrate_d_beta(b: np.array, sigma: float,
                     Sigma: np.matrix, panel: pd.DataFrame) -> float:
    """k-dimensional integral w.r.t tastes

    Args:
        panel (pd.DataFrame): data
        b (np.array): expection of tastes
        sigma (float): variance of noise in utilities
        Sigma (np.matrix): covariance of tastes

    Returns:
        float: integral value
    """

    # infinite integrating by vector of betas
    return nquad(
        partial(integrate_d_eta,
                panel=panel,
                b=b,
                sigma=sigma,
                Sigma=Sigma),
        [[-np.inf, +np.inf] for _ in range(len(b))],  # taste ranges
    )


$$
                \int_{\mathbb{R}^k}\prod_{n=1}^N\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
$$

### Data generation

In [83]:

t = 5   # time periods
k = 2   # dimensionality of features
n = 20  # amount of agents

b = np.zeros(k)
Sigma = np.eye(k)
sigma = 3

panel = generate_data(n=n, k=k, t=t, b=b, Sigma=Sigma)
panel.head()

Unnamed: 0,k0,k1,agent,choice
0,0.687424,-1.064029,0,1
1,1.061793,-1.325351,0,1
2,-1.262761,-0.327629,0,-1
3,-1.384816,-0.525422,0,-1
4,0.338701,-1.519942,0,1


In [84]:
integrate_d_beta(b, sigma, Sigma, panel)

[1. 1.]
[-1.  1.]
[233.06516869   1.        ]
[0.00429065 1.        ]
[-233.06516869    1.        ]
[-0.00429065  1.        ]
[38.2988398  1.       ]
[0.02611045 1.        ]
[-38.2988398   1.       ]
[-0.02611045  1.        ]
[13.79995165  1.        ]
[0.07246402 1.        ]
[-13.79995165   1.        ]
[-0.07246402  1.        ]


### Optimization

In [None]:
# res = minimize(
#     lambda x, panel, Sigma, sigma: -1 *
#     np.log(integrate_d_beta(x, panel, Sigma, sigma)),
#     args=(panel, Sigma, sigma),
#     x0=np.array([0.5]*k).reshape(-1, 1)
# )


In [25]:
(res.x**2).sum()


0.515423105944512