In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
from scipy.integrate import quad
from scipy.optimize import minimize


### Estimation of parameters

First of all we would like to derive from data estimations of the following parameters:
$\pmb\Sigma$, $\pmb{b}$, $\sigma$, given our data $x_n = (x_{n1}, \dots, x_{nT})$,
sequence of choices $d_n = (d_{n1}, \dots, d_{nT})$ and net utility structure 
$U_{nt} = \beta' x_{nt} + \eta_n + \varepsilon_{nt}$, where $\eta_n$ is iid 
across agents normal with zero-scalar mean and variance $\sigma$ and $\varepsilon_{nt}$
is iid across agents and time normal with zero-vector mean and $\pmb\Omega$ covariance
matrix with following form (from probit\_panel.pdf):
$$
\pmb\Omega =
 \begin{pmatrix*} 
1 + \sigma & \sigma& \dots & \sigma \\
\sigma & 1+\sigma& \dots & \sigma \\
\sigma & \sigma&\dots &1 + \sigma \\
\end{pmatrix*}
$$

$$
\begin{align*}
    P(d_n |x_n) & = \int_{\mathbb{R}^k} \left(\int_{\mathbb{R}}
    P(d_n |\eta, \beta, x_n) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta                              \\
                & =\int_{\mathbb{R}^k}\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
\end{align*}
$$

Then MLE for every person would be the following:
$$
\begin{align*}
    P(d |x) & = \int_{\mathbb{R}^k} \left(\int_{\mathbb{R}}
    P(d |\eta, \beta, x) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta \\
   & = \int_{\mathbb{R}^k} \prod_{n=1}^N\left(\int_{\mathbb{R}}
    P(d_n |\eta, \beta, x_n) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta \\
    & = \int_{\mathbb{R}^k} \prod_{n=1}^N\left(\int_{\mathbb{R}} \prod_{t=1}^T
    P(d_{nt} |\eta, \beta, x_{nt}) \phi_{\sigma}(\eta)d\eta \right)
    \phi_{\Sigma}(\beta)d\beta 
                                 \\
                & =\int_{\mathbb{R}^k}\prod_{n=1}^N\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
\end{align*}
$$

### Parameters

Then MLE for every person would be the following:
$$
                \int_{\mathbb{R}^k}\prod_{n=1}^N\left(\int_{\mathbb{R}}
    \prod_{t=1}^{T}\Phi((\beta'x_{nt} + \eta)d_{nt})
    \phi_{\sigma}(\eta)d\eta \right)\phi_{\Sigma}(\beta)
    d\beta
$$

In [20]:
def round_arr(arr):
    return [round(i, 3) for i in arr]


def sample_from_multivariate_normal(
        n: int, expectation: np.array, covariance: np.matrix) -> np.matrix:
    """returns matrix of normal distributed data, 
    given multivariate covariance and expectation

    Args:
        n (int): sample size
        expectation (np.array): vector of mv normal expectation
        covariance (np.matrix): cov matrix

    Returns:
        np.matrix: sample
    """
    L = np.linalg.cholesky(covariance)
    # Sample X from standard normal
    X = np.random.normal(size=(len(expectation), n))
    # Apply the transformation
    return L.dot(X) + expectation


def multivariate_normal_pdf(
        x: np.array, expectation: np.array, covariance: np.matrix) -> float:
    """returns pdf of given sample of mv normal distribution

    Args:
        x (np.array): sample
        expectation (np.array): param of normal distribution
        covariance (np.matrix): covariance of mv normal rv

    Returns:
        float: pdf value of sample, given population parameters
    """
    x_m = x - expectation
    return (1. / (np.sqrt((2 * np.pi)**(len(expectation)) * np.linalg.det(covariance))) *
            np.exp(-(np.linalg.solve(covariance, x_m).T.dot(x_m)) / 2))


def integrate_d_eta(
    eta: float, x_n: pd.DataFrame, beta: np.array, sigma: float
) -> float:

    res = norm.cdf(((x_n.iloc[:, :len(beta)] * beta.T).sum(axis=1) +
                   eta).multiply(x_n['choice'], axis=0))

    return res.prod() * norm.pdf(eta, scale=np.sqrt(sigma))


def integrate_d_beta(
    beta: np.array, panel: pd.DataFrame, Sigma: np.array, sigma: float
) -> float:

    ints = []
    for _, x_n in panel.groupby('agent'):
        integral = quad(integrate_d_eta, -np.inf, +
                        np.inf, args=(x_n, beta, sigma))[0]
        ints.append(integral)

    res = quad(
        lambda x, b, covariance:
            np.array(ints).prod() * multivariate_normal_pdf(
                x, b, covariance
            ),
            -np.inf,
            +np.inf,
            args=(b, Sigma)
    )[0]

    print(f"Loss: {round(-np.log(res), 3) if not np.isinf(np.log(res)) else 'inf'}")
    print(f" Expectation: {round_arr(beta)}")
    return res


def generate_data(
        n: int, k: int, t: int, b: np.array, Sigma: np.matrix) -> pd.DataFrame:
    """generates panel data based on input parameters

    Args:
        n (int): amount of agents
        k (int): amount of features of agents
        t (int): amount of time periods each agent made choice
        b (np.array): expectation of tastes
        Sigma (np.matrix): covariance of tastes

    Returns:
        pd.DataFrame: panel data
    """
    data = []

    for n in range(n):

        beta = np.random.multivariate_normal(
            mean=b,
            cov=Sigma,
        ).reshape(-1, 1)

        x_n = pd.DataFrame(
            data=np.random.normal(size=(t, k)),
            columns=[f'k{i}' for i in range(k)]
        )
        utility = x_n @ beta
        x_n['agent'] = n
        x_n['choice'] = np.where(utility > 0, 1, -1)
        data.append(x_n)

    return pd.concat(data)


### Data generation

In [21]:

t = 5   # time periods
k = 10  # dimensionality of features
n = 20  # amount of agents

b = np.zeros(k)
Sigma = np.eye(k)
sigma = 3

panel = generate_data(n=n, k=k, t=t, b=b, Sigma=Sigma)

### Optimization

In [22]:
res = minimize(
    lambda x, panel, Sigma, sigma: -1 *
    np.log(integrate_d_beta(x, panel, Sigma, sigma)),
    args=(panel, Sigma, sigma),
    x0=np.array([0.5]*k).reshape(-1, 1)
)


Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 145.918
 Expectation: [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
Loss: 95.83
 Expectation: [0.364, 0.223, 0.002, 0.337, 0.403, 0.189, 0.23, 0.139, 0.3, -0.049]
Loss: 95.83
 Expectation: [0.364, 0.

In [25]:
(res.x**2).sum()


0.515423105944512