# Testing Notebook 03

This notebook is where I'm developing the containment methods for multivariate functions in $\mathbb{R}^n$

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

from statdepth.depth._containment import _is_in_simplex
from statdepth.depth._depthcalculations import _subsequences

First, create some multidimensional data

In [2]:
data = []

df1 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))
df2 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))
df3 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))
df4 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))
df5 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))
df6 = pd.DataFrame(np.random.randint(0,5,size=(30, 3)), columns=list('ABC'))

data = [df1, df2, df3, df4, df5, df6]

In [3]:
from scipy.special import binom
from statdepth.depth._containment import _is_in_simplex
from statdepth.depth._depthcalculations import _subsequences
from typing import List

def _simplex_containment(data: List[pd.DataFrame], curve: pd.DataFrame, J=2, relax=False):
    n = len(data)
    l, d = data[0].shape
    
    # Iterate over our subsequences of functions to form simplex with d+1 vertices
    containment = 0

    # For each time index, check containment 
    for idx in curve.index:
        containment += _is_in_simplex(simplex_points=np.array([df.loc[idx, :] for df in data]), 
                                point=np.array(curve.loc[idx, :]))
    
    # If relaxation, return proportion of containment, else return integer divion so that we 
    # only get 1 if all rows are contained
    return containment / l if relax else containment // l
    

In [4]:
def _simplex_depth_f(data: list, curve: pd.DataFrame, J=2, relax=False):
    l, d = data[0].shape
    n = len(data)
    depth = 0
    
    for j in range(2, J + 1):
        S_nj = 0
        subseq = _subsequences([i for i in range(n)], d + 1)
        
        for seq in subseq:
            cdata = [data[i] for i in seq]
            S_nj += _simplex_containment(data=data, curve=curve, relax=relax)
        
        depth += S_nj / binom(n, d + 1)

    return depth

def simplexdepth(data: list, J=2, relax=False):
    depths = []
    f = [i for i in range(len(data))]
    for cdf in data:
        cdata = [df for df in data if df is not cdf]
        depths.append(_simplex_depth(data=cdata, curve=cdf, J=J, relax=relax))
        
    return pd.Series(index=f, data=depths)

In [6]:
def pointwise_depth(data: pd.DataFrame, J=2, containment='simplex'):
    
    n, d = data.shape
    depths = []
    for time in data.index:
        S_nj = 0
        
        point = data.loc[time, :]
        
        subseq = _subsequences(list(data.drop(time, axis=0).index), d + 1)

        for seq in subseq:
            S_nj += _is_in_simplex(simplex_points=
                    np.array(data.loc[seq, :]), point=np.array(point))
            
        depths.append(S_nj / binom(n, d + 1))
        
    return pd.Series(index=data.index, data=depths)
        

In [10]:
df = pd.DataFrame(np.random.rand(10, 2), columns=list('AB'))
px.scatter(x=df['A'], y=df['B'])

In [14]:
depths = pointwise_depth(data=df).sort_values(ascending=False)


In [22]:
df.loc[depths.index[0:5], :]['A']

2    0.563530
7    0.573976
0    0.745848
6    0.422898
9    0.662431
Name: A, dtype: float64

In [24]:
import plotly.graph_objects as go

def plot_deepest(df, depths, n=1):
    fig = go.Figure(data=[
        go.Scatter(x=df['A'], y=df['B'], mode='markers', marker_color='blue'),
        go.Scatter(x=df.loc[depths.index[0: n], :]['A'], y=df.loc[depths.index[0: n], :]['B'], mode='markers', 
                  marker_color='red')
    ]).show()
    
plot_deepest(df, depths, 2)