In [1]:
import numpy as np
import roughpy as rp
import pandas as pd
from pathlib import Path
import pickle
from functools import partial

from FindAnomalousIntervals import find_anomalous_intervals
from Utils import restrict_interval_for_power, create_power_sequence, compute_signature
from Visualise import draw_segment
from Augmentations import CumSum, AddTime

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df_anom = find_anomalous_intervals('NN',50,3)

In [3]:
def restrict_interval(before,after,row):
    return row.signatures[max(0,row.anom_index+before):min(len(row.signatures), row.anom_index+after)]

In [4]:
rifp = partial(restrict_interval,-5,15)
anoms=df_anom[df_anom.channel.isin([5,6,10,11,13])].apply(rifp,axis=1)

In [9]:
depth=4
find_sigs = partial(compute_signature,depth,[CumSum,AddTime])

In [66]:

ps=anoms.apply(create_power_sequence)
sigs=ps.apply(find_sigs)


In [12]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,roc_auc_score,roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

class Scaler:
    def __init__(self,thres=1e-10):
        self.thres=thres
    
    def fit(self,X):
        self.mu = X.mean(0)
        X=X-self.mu
        U, S, Vt = np.linalg.svd(X)
        k = np.sum(S > self.thres)  # detected numerical rank
        self.numerical_rank = k
        self.Vt = Vt[:k]
        self.S = S[:k]
        
    def transform(self,X):
        x = X - self.mu
        return x @ self.Vt.T  / self.S


def EvaluateClassifier(X,y,split=0.4,n_neighbors=1,scal=True):
    X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=split)

    if scal:
        scaler=Scaler(1e-8)
        scaler.fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)


    xgb_model=XGBClassifier()
    logistic_regression = LogisticRegression()
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)
    xgb_model.fit(X_train,y_train)
    xgb_pred=xgb_model.predict(X_test)

    cr_xgb = classification_report(y_test, xgb_pred)


    logistic_regression.fit(X_train, y_train)
    lr_pred = logistic_regression.predict(X_test)

    cr_lr = classification_report(y_test, lr_pred)

    knn.fit(X_train,y_train)
    knn_pred = knn.predict(X_test)
    cr_knn = classification_report(y_test, knn_pred)

    return {'XGB':cr_xgb,'LR': cr_lr, 'KNN': cr_knn}

In [13]:
sigs

0      [1.0, 10.0, 368.5799985403979, 3823.8880146676...
0      [1.0, 10.0, 424.825157612431, 4461.18513515901...
0      [1.0, 10.0, 96.78519380595532, 966.26652141278...
1      [1.0, 10.0, 217.47215655114906, 1753.410843685...
2      [1.0, 10.0, 213.79602083105937, 1759.812874668...
                             ...                        
387    [1.0, 10.0, 337.3512873084791, 2763.0627491806...
53     [1.0, 10.0, 332.6937400490728, 2930.9164730513...
402    [1.0, 10.0, 356.35523823228743, 2970.535257445...
135    [1.0, 10.0, 585.6922391953755, 5676.9828230887...
54     [1.0, 10.0, 285.3333121578959, 2373.9605698665...
Length: 334, dtype: object

In [22]:
mask={5:0,6:0,10:1,11:2,13:3}
results=EvaluateClassifier(np.vstack(sigs),df_anom[df_anom.channel.isin([5,6,10,11,13])].channel.replace(mask))

In [23]:
print(results['XGB'])

              precision    recall  f1-score   support

           0       0.94      0.96      0.95        50
           1       0.91      0.89      0.90        47
           2       0.96      0.96      0.96        26
           3       0.82      0.82      0.82        11

    accuracy                           0.93       134
   macro avg       0.91      0.91      0.91       134
weighted avg       0.93      0.93      0.93       134



In [227]:
df_anom[df_anom.channel.isin([3])].shape

(11, 5)

In [89]:
import pyximport
pyximport.install()

(None, <pyximport._pyximport3.PyxImportMetaFinder at 0x1bfdec94b90>)

In [None]:
##### As

# As using sig kernel talk about linear independence of signatures

In [97]:
import torch
from sklearn.preprocessing import LabelEncoder
import sigkernel
from sklearn.model_selection import GridSearchCV
from tqdm import tqdm
from sklearn.svm import SVC

In [100]:
svc_parameters = {'C': np.logspace(0, 4, 5), 'gamma': list(np.logspace(-4, 4, 9)) + ['auto']}
_sigmas = [1e-3, 5e-3, 1e-2, 2.5e-2, 5e-2, 7.5e-2, 1e-1, 2.5e-1, 5e-1, 7.5e-1, 1., 2., 5., 10.]
_scales = [5e-2, 1e-1, 5e-1, 1e0]
trained_models = {}
best_scores_train={'signature pde':0}

In [93]:
from Utils import to_array

In [166]:
labels = df_anom[df_anom.channel.isin([5,6,10,11,13])].channel.replace(mask)

In [169]:

seqs=[]
y=[]
for j in range(ps.shape[0]):
    seq = ps.iloc[j]
    if seq.shape[0] == 20:
        seqs.append(seq)
        y.append(labels.iloc[j])

In [170]:
X = np.hstack(seqs).T

In [171]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [172]:
mask={5:0,6:0,10:1,11:2,13:3}

In [173]:
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

# path-transform
x_train = sigkernel.transform(X_train, at=True, ll=True, scale=.1)

In [174]:
if x_train.shape[0] <= 150 and x_train.shape[1] <=150 and x_train.shape[2] <= 8:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dtype = torch.float32
else: # otherwise do computations in cython
    device = 'cpu'
    dtype = torch.float64

# numpy -> torch
x_train = torch.tensor(x_train, dtype=dtype, device=device)

# grid search over sigmas
sigmas = tqdm(_sigmas, position=2, leave=False)
for sigma in sigmas:
    sigmas.set_description(f"signature PDE sigma: {sigma}")

    # define static kernel
    static_kernel = sigkernel.RBFKernel(sigma=sigma)

    # initialize corresponding signature PDE kernel
    signature_kernel = sigkernel.SigKernel(static_kernel, dyadic_order=0)

    # compute Gram matrix on train data
    G_train = signature_kernel.compute_Gram(x_train, x_train, sym=True).cpu().numpy()

    # SVC sklearn estimator
    svc = SVC(kernel='precomputed', decision_function_shape='ovo')
    svc_model = GridSearchCV(estimator=svc, param_grid=svc_parameters, cv=5, n_jobs=-1)
    svc_model.fit(G_train, y_train)
    
    # empty memory
    del G_train
    torch.cuda.empty_cache()

    # store results
    if svc_model.best_score_ > best_scores_train['signature pde']:
        best_scores_train['signature pde'] = svc_model.best_score_


[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

In [175]:
x_test = sigkernel.transform(X_test, at=True, ll=True, scale=.1)

# move to cuda (if available and memory doesn't exceed a certain threshold)
if x_test.shape[0] <= 150 and x_test.shape[1] <=150 and x_test.shape[2] <= 10:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dtype = torch.float64
else: # otherwise do computations in cython
    device = 'cpu'
    dtype = torch.float64
    
# numpy -> torch 
x_train = torch.tensor(x_train, dtype=dtype, device=device)
x_test = torch.tensor(x_test, dtype=dtype, device=device)

# define static kernel
static_kernel = sigkernel.RBFKernel(sigma=sigma)

# initialize corresponding signature PDE kernel
signature_kernel = sigkernel.SigKernel(static_kernel, dyadic_order=0)
    
# compute Gram matrix on test data
G_test = signature_kernel.compute_Gram(x_test, x_train, sym=False).cpu().numpy()

# record scores
train_score = svc_model.best_score_
test_score = svc_model.score(G_test, y_test)

  x_train = torch.tensor(x_train, dtype=dtype, device=device)


In [176]:
train_score

0.9384615384615385

In [177]:
test_score

0.9384615384615385