In [1]:
# Intro to One class SVM: http://rvlasveld.github.io/blog/2013/07/12/introduction-to-one-class-support-vector-machines/
# How to use Support Vector Machines for One-Class Classification?: https://analyticsindiamag.com/how-to-use-support-vector-machines-for-one-class-classification/
# Advantages & Disadvantages of SVM: https://dhirajkumarblog.medium.com/top-4-advantages-and-disadvantages-of-support-vector-machine-or-svm-a3c06a2b107

# Papers #
# Support Vector Method for Novelty Detection (Schölkopf - distance maximization from origin to hyperplane):https://proceedings.neurips.cc/paper/1999/file/8725fb777f25776ffa9076e44fcfd776-Paper.pdf
# Support Vector Data Description (Tax and Duin - spherical approach):https://link.springer.com/content/pdf/10.1023/B:MACH.0000008084.60811.49.pdf

# Used in this JN: https://www.analyticsvidhya.com/blog/2022/06/one-class-classification-using-support-vector-machines/

# Clustering Methods (Extra): https://estreuselito.github.io/Deus_ex_machina/Mini-Project/Master.html
# Novelty and Outlier detection (extra): https://scikit-learn.org/stable/modules/outlier_detection.html#overview-of-outlier-detection-methods

In [2]:
# One class SVM 10 data points.
from sklearn.svm import OneClassSVM

X = [[1],[2],[3],[4],[5],[6],[7],[8],[9],[10]]

y = [[-1],[1],[-2],[2],[-3]]

one_svm = OneClassSVM(gamma='auto', nu=0.01).fit(X)

# gamma is used to set the kernel function for forming the hypersphere to learn and
# differienciate samples and the hyperparameter nu is tuned to approximate the ratio
# of outliers

one_svm.predict(y)

# estimator predict method is used to classify the data points between classes 1, -1
# based on the training data

array([-1,  1, -1,  1, -1])

In [3]:
one_svm.score_samples(y)
# score_samples method is used to access the scoring function of the estimator and the 
# contamination parameter is used to set the threshold for classification 

array([2.47729192e-04, 1.67496679e-02, 1.66310989e-06, 1.70740848e-02,
       1.51581080e-09])

In [4]:
one_svm.decision_function(y)
# decision_function returns the value such that the negative values represents the
# sample to be outlier or out of training distribution

array([-0.01639928,  0.00010266, -0.01664534,  0.00042708, -0.016647  ])