In [22]:
import pandas as pd 
import numpy as np
from typing import List, Union
from scipy.special import erf, binom

from statdepth.depth._depthcalculations import _subsequences

def _norm_cdf(x: np.array, mu: float, sigma: float):
    """
    Estimate the CDF at x for the normal distribution parametrized by mu and sigma^2
    """
    print(f'erf term is {erf(x - mu) / (sigma * np.sqrt(2))}')
    return 0.5 * (1 + erf(x - mu) / (sigma * np.sqrt(2)))

def _uncertain_depth_univariate(data: pd.DataFrame, curve: Union[str, int], sigma2: pd.DataFrame, J: int=2, strict=False):
    """
    Calculate uncertain depth for the given curve, assuming each entry in our data comes from a normal distribution 
    where the mean is the observed value and the variance is the corresponding entry in sigma2.

    Parameters:
    -----------
    data: pd.DataFrame
        An n x p matrix, where we have p real-valued functions collected at n discrete time intervals
    curve: int or str
        Column (function) to calculate depth for 
    sigma2: pd.DataFrame
        An n x p matrix where each entry is the variance of the distribution at that entry

    Returns:
    ----------
    pd.Series: Depth values for each function (column)
    """

    n, p = data.shape
    depth = 0
    sigma = sigma2.pow(.5)

    # Drop our current curve from our data
    if curve in data.columns:   
        data = data.drop(curve, axis=1)

    subseq = _subsequences(data.columns, J)
    if J == 2:
        for seq in subseq:
            d = 1
            f1 = seq[0]
            f2 = seq[1]
            for time in data.index:
                p1 = _norm_cdf(data.loc[time, f1], data.loc[time, f1], sigma.loc[time, f1])
                p2 = _norm_cdf(data.loc[time, f2], data.loc[time, f2], sigma.loc[time, f2])
                print(f'p1 is {p1}, p2 is {p2}')

                if strict:
                    d *= p1 + p2 - 2 * p1 * p2
                else: 
                    d += p1 + p2 - 2 * p1 * p2

            depth += d
    elif J == 3:
        for seq in subseq:
            d = 1
            f1, f_2, f_3 = seq[0], seq[1], seq[2]

            for time in data.index:
                p1 = _norm_cdf(data.loc[time, f1], data.loc[time, f1], sigma.loc[time, f1])
                p2 = _norm_cdf(data.loc[time, f2], data.loc[time, f2], sigma.loc[time, f2])
                p3 = _norm_cdf(data.loc[time, f3], data.loc[time, f3], sigma.loc[time, f3])
                
                if strict:
                    d *= p1 + p2 + p3 - p1 * p2 - p2*p3 - p1*p3
                else:
                    d += p1 + p2 + p3 - p1 * p2 - p2*p3 - p1*p3

            depth += d
    else: # Handle J=4 later, not sure about computation
        pass

    return depth / binom(data.shape[1], J) if strict else depth / binom(data.shape[1], J) * n / p # Because in the nonstrict case we are summing 1/|D| n times


In [23]:
from statdepth.testing import generate_noisy_univariate

df = generate_noisy_univariate() * 5
sigma2 = generate_noisy_univariate() * 5

In [24]:
def probabilistic_depth(data: pd.DataFrame, sigma2: pd.DataFrame, J: int=2, strict=False):
    depths = []
    
    cols = data.columns
    for col in data.columns:
        depths.append(_uncertain_depth_univariate(data=data, curve=col, sigma2=sigma2, J=J, strict=strict))
        
    return pd.Series(index=cols, data=depths)
            

In [35]:
p = '*'
exec(f'p=5{p}5')

In [36]:
p

25