### Appendix. Multivariate normal density

To evaluate the normal log-likelikelihood I am using a function that uses an eigendecomposition of the covariance matrix.
This is a notebook to compare the loglikelihood against the logpdf function from scipy.stats.multivariate_normal

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
import time

In [2]:
obj = pd.read_pickle('pciSeq.pickle')
# obj = pd.read_pickle('iter_0.pickle')

In [3]:
def mvn_loglik(data, cell_label, cells, is3D):
    """
    Calculates the multivariate normal log likelihood for spots.

    Parameters:
        data (np.array): Spot data.
        cell_label (np.array): Cell labels for spots.
        cells (Cells): Cells object containing cell data.
        is3D (bool): Whether the data is 3D.

    Returns:
        np.array: Log likelihood values.
    """
    centroids = cells.centroid.values[cell_label]
    covs = cells.cov[cell_label]
    if not is3D:
        data = data[:, :-1]
        centroids = centroids[:, :-1]

        
    tic = time.time()
    out = multiple_logpdfs(data, centroids, covs)
    toc = time.time() - tic
    print(f"multiple_logpdfs finished in {toc} secs")

    param = list(zip(*[data, centroids, covs]))
    tic
    out_2 = [multivariate_normal.logpdf(p[0], p[1], p[2]) for i, p in enumerate(param)]
    toc = time.time() - tic
    print(f"multivariate_normal finished in {toc} secs")
    return out, out_2

In [4]:
def multiple_logpdfs(x: np.ndarray, means: np.ndarray, cov: np.ndarray) -> np.ndarray:
    """
    vectorised mvn log likelihood evaluated at multiple pairs of (centroid_1, cov_1), ..., (centroid_N, cov_N)
    Taken from http://gregorygundersen.com/blog/2020/12/12/group-multivariate-normal-pdf/
    """
    # Thankfully, NumPy broadcasts `eigh`.
    vals, vecs = np.linalg.eigh(cov)

    # Compute the log determinants across the second axis.
    logdets = np.sum(np.log(vals), axis=1)

    # Invert the eigenvalues.
    valsinvs = 1. / vals

    # Add a dimension to `valsinvs` so that NumPy broadcasts appropriately.
    Us = vecs * np.sqrt(valsinvs)[:, None]
    devs = x - means

    # Use `einsum` for matrix-vector multiplications across the first dimension.
    devUs = np.einsum('ni, nij->nj', devs, Us)

    # Compute the Mahalanobis distance by squaring each term and summing.
    mahas = np.sum(np.square(devUs), axis=1)

    # Compute and broadcast scalar normalizers.
    dim = len(vals[0])
    log2pi = np.log(2 * np.pi)

    return -0.5 * (dim * log2pi + mahas + logdets)

### Evaluation

In [5]:
# first for each one of the spots, get the label of its closest cell
labels = obj.spots.parent_cell_id[:, 0]

In [6]:
# Pass now the spots xyz coords, the labels, the cell object and evaluate the normal loglikelihood
# The first return value is the loglikelihood calculated from our user-defined function
# The second return value is the logliehood calculated from python's scipy library
mvn_loglik_1, mvn_loglik_2 = mvn_loglik(obj.spots.xyz_coords, labels, obj.cells, obj.config['is3D'])

multiple_logpdfs finished in 0.3523552417755127 secs
multivariate_normal finished in 54.98275709152222 secs


### Sanity checking

In [7]:
len(mvn_loglik_1)

889307

In [8]:
len(mvn_loglik_2)

889307

In [9]:
np.allclose(mvn_loglik_1, mvn_loglik_2)

True