<a href="https://colab.research.google.com/github/Tyred/TimeSeries_OCC-PUL/blob/main/Notebooks/runners/OC_JKNN_UCR_runner.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<h1> One-Class J-K Nearest Neighbor Classifier</h1>

The main purpose of this notebook is to run the OneClass JKNN algorithm through all binary datasets in [UCR Time Series Archive](https://www.cs.ucr.edu/~eamonn/time_series_data_2018/) and report the results

# Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score

#from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
#from dtaidistance import dtw
#from cdtw import pydtw
#from fastdtw import fastdtw
#from scipy.spatial.distance import euclidean

# OneClassJKNN Definition

In [None]:
def fastdtw_wrapper(s1, s2):
    return fastdtw(s1, s2, dist=euclidean)[0]

def pydtw_wrapper(s1, s2):
    return pydtw.dtw(s1,s2,pydtw.Settings(step = 'p0sym',       #Sakoe-Chiba symmetric step with slope constraint p = 0
                                          window = 'palival',   #type of the window
                                          param = 2.0,          #window parameter
                                          norm = False,         #normalization
                                          compute_path = True)).get_dist()

class OneClassJKNN():
    
    def __init__(self, j, k): # j, k >= 1
        self.j = j
        self.k = k

        self.nbrs = NearestNeighbors(n_neighbors=self.k)

    def fit(self, train_data):
        self.nbrs.fit(train_data)
        distances, indices = self.nbrs.kneighbors()
           
        self.dist_avg = np.mean(distances, axis=1)
        
        return self

    def predict(self, test_samples, threshold):
        predictions = np.zeros(len(test_samples))
        sample_no = 0
        for test_sample in test_samples:
            distances, indices = self.nbrs.kneighbors(test_sample.reshape(1,-1), self.j)
            jnbrs_avg = np.mean(distances)
                      
            j_knbrs_avg = np.sum(self.dist_avg[indices])/self.j

            if jnbrs_avg <= (j_knbrs_avg * threshold):
                predictions[sample_no] = 1
            else:
                predictions[sample_no] = -1
            
            sample_no += 1

        return predictions

# Reports function

In [None]:
def print_stats(predictions, labels):
    print("Accuracy = %.2f"  % (accuracy_score(labels, predictions)  *100) + "%")
    print("Precision = %.2f" % (precision_score(labels, predictions) *100) + "%")
    print("Recall = %.2f"    % (recall_score(labels, predictions)    *100) + "%")
    print("F1-Score = %.2f"  % (f1_score(labels, predictions)        *100) + "%")

    return f1_score(labels, predictions)

# Runner cell

In [None]:
# Parameter config
j = 2
k = 5
threshold = 1.35

datasets = ['Yoga', 'WormsTwoClass', 'Wine', 'Wafer', 'TwoLeadECG', 'Strawberry', 'SemgHandGenderCh2', 
            'BeetleFly', 'BirdChicken', 'Computers', 'DistalPhalanxOutlineCorrect', 'Earthquakes',
            'ECG200', 'ECGFiveDays', 'FordA', 'FordB', 'HandOutlines', 'ItalyPowerDemand', 
            'MiddlePhalanxOutlineCorrect', 'Chinatown', 'FreezerRegularTrain', 'FreezerSmallTrain',
            'GunPointAgeSpan', 'GunPointMaleVersusFemale', 'GunPointOldVersusYoung', 'PowerCons', 'Coffee',
            'Ham', 'Herring', 'Lightning2', 'MoteStrain', 'PhalangesOutlinesCorrect', 'ProximalPhalanxOutlineCorrect',
            'ShapeletSim', 'SonyAIBORobotSurface1', 'SonyAIBORobotSurface2', 'ToeSegmentation1', 'ToeSegmentation2',
            'HouseTwenty']

path = 'drive/My Drive/UFSCar/FAPESP/IC/Data/UCRArchive_2018'

f1_sum = 0
for dataset in datasets:
    tr_data = np.genfromtxt(path + "/" + dataset + "/" + dataset + "_TRAIN.tsv", 
                            delimiter="\t",)
    te_data = np.genfromtxt(path + "/" + dataset + "/" + dataset + "_TEST.tsv", 
                            delimiter="\t",)

    labels = te_data[:, 0]
    #print("Labels:", np.unique(labels))
    unique_labels = np.unique(labels)
    for class_label in unique_labels:
        train_data  = tr_data[tr_data[:, 0] == class_label, 1:] # train
        test_data   = te_data[:, 1:]

        # Labeling
        occ_labels = [1 if x == class_label else -1 for x in labels]

        clf = OneClassJKNN(j, k).fit(train_data)

        result_labels = clf.predict(test_data, threshold)
        print("Dataset:", dataset, "Positive Label:", class_label)
        f1_sum += print_stats(result_labels, occ_labels)

f1_avg = f1_sum/(len(datasets)*2)
print("F1-Score Average:", f1_avg)