In [35]:
import numpy as np
import pandas as pd
from scipy.interpolate import UnivariateSpline
from scipy.integrate import trapz, cumtrapz
import optimum_reparamN2 as orN2
import optimum_reparam_N as orN
from joblib import Parallel, delayed


df = pd.read_csv("./data/sample_data2.csv")
print(df.shape)
df.head()

(288, 102)


Unnamed: 0,bp0_sb0_cpu_0_temp,bp0_sb0_cpu_1_temp,bp0_sb0_cpu_2_temp,bp0_sb0_cpu_3_temp,bp0_sb1_cpu_0_temp,bp0_sb1_cpu_1_temp,bp0_sb1_cpu_2_temp,bp0_sb1_cpu_3_temp,bp0_sb2_cpu_0_temp,bp0_sb2_cpu_1_temp,...,bp1_sb11_cpu_0_temp,bp1_sb11_cpu_1_temp,bp1_sb11_cpu_2_temp,bp1_sb11_cpu_3_temp,iosb0_cpu_temp,iosb1_cpu_temp,iosb2_cpu_temp,iosb3_cpu_temp,iosb4_cpu_temp,iosb5_cpu_temp
0,17.85,18.48,18.8,17.85,17.85,18.48,18.17,17.53,18.17,18.17,...,17.53,18.48,19.12,17.85,16.58,16.26,17.21,16.26,15.3,17.85
1,17.85,18.17,18.48,17.53,17.53,18.48,18.17,17.53,18.17,18.17,...,17.53,18.17,19.12,17.85,16.58,15.94,17.21,16.26,15.3,17.85
2,17.85,18.17,18.48,17.53,17.85,18.48,18.17,17.53,18.17,18.17,...,17.53,18.17,19.12,17.85,16.58,16.26,17.21,16.26,15.3,17.85
3,17.85,18.17,18.8,17.53,17.53,18.48,18.17,17.53,18.17,18.17,...,17.53,18.17,19.12,17.85,16.58,15.94,17.21,16.26,15.3,17.85
4,17.85,18.17,18.8,17.53,17.85,18.48,18.17,17.53,18.17,18.17,...,17.53,18.17,19.12,17.85,16.58,16.26,17.21,16.26,15.3,17.85


In [43]:
start = 0
n_timepts = 20

In [40]:
def restart():    
    F = df[start:n_timepts].iloc[:,:-1].to_numpy()
    time_points = np.linspace(0, 1, F.shape[0])

    return F, time_points

F, time_points = restart()

In [41]:
def f_to_srsf(f, time, smooth=False):
    eps = np.finfo(np.double).eps
    f0, g, g2 = gradient_spline(time, f, smooth)
    q = g / np.sqrt(np.fabs(g) + eps)
    return q

def gradient_spline(time, f, smooth=False):
    M = f.shape[0]

    if f.ndim > 1:
        N = f.shape[1]
        f0 = np.zeros((M, N))
        g = np.zeros((M, N))
        g2 = np.zeros((M, N))
        for k in range(0, N):
            if smooth:
                spar = time.shape[0] * (.025 * np.fabs(f[:, k]).max()) ** 2
            else:
                spar = 0
            tmp_spline = UnivariateSpline(time, f[:, k], s=spar)
            f0[:, k] = tmp_spline(time)
            g[:, k] = tmp_spline(time, 1)
            g2[:, k] = tmp_spline(time, 2)
    else:
        if smooth:
            spar = time.shape[0] * (.025 * np.fabs(f).max()) ** 2
        else:
            spar = 0
        tmp_spline = UnivariateSpline(time, f, s=spar)
        f0 = tmp_spline(time)
        g = tmp_spline(time, 1)
        g2 = tmp_spline(time, 2)

    return f0, g, g2

def optimum_reparam(q1, time, q2, method="DP2", lam=0.0, penalty="roughness", grid_dim=7):
    if penalty == "l2gam" and (method == "DP" or method == "DP2"):
        raise Exception('penalty not implemented')
    if penalty == "l2psi" and (method == "DP" or method == "DP2"):
        raise Exception('penalty not implemented')
    if penalty == "geodesic" and (method == "DP" or method == "DP2"):
        raise Exception('penalty not implemented')
    
    if method == "DP2":
        if q1.ndim == 1 and q2.ndim == 1:
            gam = orN2.coptimum_reparam(np.ascontiguousarray(q1), time,
                                        np.ascontiguousarray(q2), lam, grid_dim)

        if q1.ndim == 1 and q2.ndim == 2:
            gam = orN2.coptimum_reparamN(np.ascontiguousarray(q1), time,
                                        np.ascontiguousarray(q2), lam, grid_dim)

        if q1.ndim == 2 and q2.ndim == 2:
            gam = orN2.coptimum_reparamN2(np.ascontiguousarray(q1), time,
                                        np.ascontiguousarray(q2), lam, grid_dim)
        
    else:
        raise Exception('Invalid Optimization Method')

    return gam

def elastic_distance(f1, f2, time, method="DP2", lam=0.0):
    q1 = f_to_srsf(f1, time)
    q2 = f_to_srsf(f2, time)

    gam = optimum_reparam(q1, time, q2, method, lam)
    fw = warp_f_gamma(time, f2, gam)
    qw = warp_q_gamma(time, q2, gam)

    Dy = np.sqrt(trapz((qw - q1) ** 2, time))
    M = time.shape[0]

    time1 = np.linspace(0,1,M)
    binsize = np.mean(np.diff(time1))
    psi = np.sqrt(np.gradient(gam,binsize))
    q1dotq2 = trapz(psi, time1)
    if q1dotq2 > 1:
        q1dotq2 = 1
    elif q1dotq2 < -1:
        q1dotq2 = -1

    Dx = np.real(np.arccos(q1dotq2))

    return Dy, Dx

def elastic_outliers(F, depths):

    amp = depths['amplitude']
    phs = depths['phase']

    amp_100 = np.max(amp)
    phs_100 = np.max(phs)

    amp_50 = np.percentile(amp, 50)
    phs_50 = np.percentile(phs, 50)

    amp_iqr = amp_100 - amp_50 
    phs_iqr = phs_100 - phs_50

    amp_lim = max(amp_50 - 1.5 * amp_iqr, 0)
    phs_lim = max(phs_50 - 1.5 * phs_iqr, 0)

    amp_thre = np.percentile(amp, 0.5 * 100)
    phs_thre = np.percentile(phs, 0.5 * 100)

    amp_out = (amp < amp_lim) & (amp < amp_thre)
    phs_out = (phs < phs_lim) & (phs < phs_thre)

    labels = {'amp': amp_out, 'phs': phs_out}

    return labels

def warp_f_gamma(time, f, gam):
    f_temp = np.interp((time[-1] - time[0]) * gam + time[0], time, f)
    return f_temp

def warp_q_gamma(time, q, gam):
    M = gam.size
    gam_dev = np.gradient(gam, 1 / np.double(M - 1))
    tmp = np.interp((time[-1] - time[0]) * gam + time[0], time, q)

    q_temp = tmp * np.sqrt(gam_dev)

    return q_temp

def distmat(f, f1, time, idx, method):
    N = f.shape[1]
    dp = np.zeros(N)
    da = np.zeros(N)

    for jj in range(N):
        Dy,Dx = elastic_distance(f[:,jj], f1, time, method)

        da[jj] = Dy
        dp[jj] = Dx
    
    return(da, dp)

def elastic_depth(F, time_points, method="DP2", lam=0.0, parallel=True):
    obs, fns = F.shape

    amp_dist = np.zeros((fns,fns))
    phs_dist = np.zeros((fns,fns))

    if parallel:
        out = Parallel(n_jobs=-1)(delayed(distmat)(F, F[:, n], time_points, n, method) for n in range(fns))
        for i in range(0, fns):
            amp_dist[i, :] = out[i][0]
            phs_dist[i, :] = out[i][1]
    else:
        for i in range(0, fns):
            amp_dist[i, :], phs_dist[i, :] = distmat(F, F[:, i], time_points, i, method)
    
    amp_dist = amp_dist + amp_dist.T
    phs_dist = phs_dist + phs_dist.T

    amp = 1 / (1 + np.median(amp_dist,axis=0))
    phase = 1 / (1 + np.median(phs_dist,axis=0))
    phase = ((2+np.pi)/np.pi) * (phase - 2/(2+np.pi))

    return amp, phase

In [69]:
F, time_points = restart()
amp_depth, phs_depth = elastic_depth(F, time_points)
depths = {'amplitude': amp_depth, 'phase': phs_depth}
amp_depth, phs_depth

(array([0.21770349, 0.21347695, 0.20689508, 0.25345023, 0.17753604,
        0.20948129, 0.16753463, 0.18333807, 0.18953566, 0.22369266,
        0.24212508, 0.27229296, 0.20403518, 0.23814394, 0.21141354,
        0.21986184, 0.23818937, 0.18967921, 0.20351264, 0.18969972,
        0.20072287, 0.21704589, 0.22601842, 0.20072287, 0.18727198,
        0.24049211, 0.23313059, 0.18706618, 0.22379958, 0.21669549,
        0.2209525 , 0.24323086, 0.21904472, 0.19994637, 0.18970665,
        0.17750713, 0.24323086, 0.20885572, 0.19415116, 0.24141017,
        0.16892652, 0.2428518 , 0.23181764, 0.18420363, 0.17612907,
        0.17248324, 0.18023974, 0.18827784, 0.26107705, 0.27605776,
        0.25814572, 0.23660054, 0.2549053 , 0.26625201, 0.2549053 ,
        0.26507306, 0.2592223 , 0.25342019, 0.26599956, 0.2638491 ,
        0.23652082, 0.24017374, 0.25814572, 0.25744467, 0.25814572,
        0.26200731, 0.25137309, 0.27244238, 0.25814572, 0.25756883,
        0.2649004 , 0.25814572, 0.21112031, 0.25

In [70]:
len(depths['amplitude'])

101

In [71]:
elastic_out = elastic_outliers(F, depths)
print('number of labels:',len(elastic_out['amp']))
elastic_out['amp']

number of labels: 101


array([False, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False,  True, False, False, False,  True,
        True,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False])

In [72]:
def prog_update(F, f, depths, labels, method="DP2", lam=0.0, parallel=True):
    obs, fns = F.shape

    amp_dist = np.zeros(fns)
    phs_dist = np.zeros(fns)

    print(f.shape)
    print(fns)
    print(len(time_points))

    # computing elastic distances 
    amp_dist, phs_dist = distmat(F, f, time_points, 0, method)

    amp_new = 1 / (1 + np.median(amp_dist,axis=0))
    phase_new = 1 / (1 + np.median(phs_dist,axis=0))
    phase_new = ((2+np.pi)/np.pi) * (phase_new - 2/(2+np.pi))

    # computing outlier label for new amp/phase depth
    amp_depths = depths['amplitude']
    phs_depths = depths['phase']

    amp_100 = np.max(amp_depths)
    phs_100 = np.max(phs_depths)

    amp_50 = np.percentile(amp_depths, 50)
    phs_50 = np.percentile(phs_depths, 50)

    amp_iqr = amp_100 - amp_50 
    phs_iqr = phs_100 - phs_50

    amp_lim = max(amp_50 - 1.5 * amp_iqr, 0)
    phs_lim = max(phs_50 - 1.5 * phs_iqr, 0)

    amp_thre = np.percentile(amp_depths, 0.5 * 100)
    phs_thre = np.percentile(phs_depths, 0.5 * 100)

    amp_out = (amp_new < amp_lim) & (amp_new < amp_thre)
    phs_out = (phase_new < phs_lim) & (phase_new < phs_thre)

    # updating labels
    labels['amp'] = np.append(labels['amp'], amp_out)
    labels['phs'] = np.append(labels['phs'], phs_out)

    # updating depths
    depths['amplitude'] = np.append(depths['amplitude'], amp_new)
    depths['phase'] = np.append(depths['phase'], phase_new)

    return labels

In [73]:
F_new = df[start:n_timepts].iloc[:,101].T.to_numpy()
out_labels = prog_update(F, F_new, depths, elastic_out)

(20,)
101
20


In [74]:
len(depths['amplitude'])

102

In [75]:
print('number of labels:',len(out_labels['amp']))
out_labels['amp']

number of labels: 102


array([False, False, False, False,  True, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
       False, False, False, False,  True, False, False, False,  True,
        True,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False])