<a href="https://colab.research.google.com/github/Tyred/TimeSeries_OCC-PUL/blob/main/Notebooks/IsolationForest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Imports

In [45]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from sklearn.ensemble import IsolationForest
from sklearn.metrics import precision_score, accuracy_score, recall_score, f1_score

import tensorflow as tf
from tensorflow import keras

from sklearn.decomposition import PCA
from sklearn.manifold import MDS

## Reading the dataset from Google Drive


In [46]:
path = 'drive/My Drive/UFSCar/FAPESP/IC/Data/UCRArchive_2018'

dataset = input('Dataset: ')

tr_data = np.genfromtxt(path + "/" + dataset + "/" + dataset + "_TRAIN.tsv", delimiter="\t",)
te_data = np.genfromtxt(path + "/" + dataset + "/" + dataset + "_TEST.tsv", delimiter="\t",)

labels = te_data[:, 0]
print("Labels:", np.unique(labels))

Dataset: Strawberry
Labels: [1. 2.]


## Splitting in Train-Test data

In [47]:
class_label = int(input('Positive class label: '))

train_data  = tr_data[tr_data[:, 0] == class_label, 1:] # train
test_data   = te_data[:, 1:]                            # test

print("Train data shape:", train_data.shape)
print("Test data shape:", test_data.shape)

Positive class label: 1
Train data shape: (219, 235)
Test data shape: (370, 235)


## Labeling for OCC Task
<li> Label 1 for positive class </li>
<li> Label -1 for other class(es) </li>

In [48]:
occ_labels = [1 if x == class_label else -1 for x in labels]
print("Positive samples:", occ_labels.count(1))
print("Negative samples:", occ_labels.count(-1))

Positive samples: 132
Negative samples: 238


## Results


In [49]:
clf = IsolationForest(random_state=0).fit(train_data)

In [50]:
result_labels = clf.predict(test_data)

acc = accuracy_score(occ_labels, result_labels)
precision = precision_score(occ_labels, result_labels)
recall = recall_score(occ_labels, result_labels)
f1 = f1_score(occ_labels, result_labels)

print("Accuracy: %.2f"  % (acc*100)       + "%")
print("Precision: %.2f" % (precision*100) + "%")
print("Recall: %.2f"    % (recall*100)    + "%")
print("F1-Score: %.2f"  % (f1*100)        + "%")

Accuracy: 79.46%
Precision: 65.05%
Recall: 91.67%
F1-Score: 76.10%
