# OD - Project 1 - Milestone 4

In [1]:
import pandas as pd
import sklearn
from sklearn.metrics import classification_report
from sklearn.svm import OneClassSVM
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from scipy.io import loadmat

In [2]:
data = loadmat('thyroid.mat')
X = pd.DataFrame(data['X'])
y = pd.Series(data['y'][:,0])
y = y.map({0:1, 1:-1})

In [3]:
print(X.shape, y.shape)

(3772, 6) (3772,)


In [4]:
print(y.value_counts())

 1    3679
-1      93
dtype: int64


In [5]:
print(y.value_counts(normalize=True))

 1    0.975345
-1    0.024655
dtype: float64


In [6]:
outliers_fraction=0.1

In [7]:
anomaly_algorithms = [
    ("One-Class SVM RBF", OneClassSVM(nu=outliers_fraction, kernel="rbf")),
    ("One-Class SVM degree 2", OneClassSVM(nu=outliers_fraction, kernel="poly", degree=2)),
    ("One-Class SVM degree 3", OneClassSVM(nu=outliers_fraction, kernel="poly", degree=2)),
    ("Robust Covariance", EllipticEnvelope(contamination=outliers_fraction)),
    ("Isolation Forest, 50 trees", IsolationForest(contamination=outliers_fraction, n_estimators=50)),
    ("Isolation Forest, 100 trees", IsolationForest(contamination=outliers_fraction, n_estimators=100)),
    ("Isolation Forest, 200 trees", IsolationForest(contamination=outliers_fraction, n_estimators=200)),
]

In [8]:
for name, algo in anomaly_algorithms:
    print('*' * 55)
    print(name)
    print()
    y_pred = algo.fit_predict(X)
    print(classification_report(y, y_pred))

*******************************************************
One-Class SVM RBF

              precision    recall  f1-score   support

          -1       0.17      0.68      0.27        93
           1       0.99      0.91      0.95      3679

    accuracy                           0.91      3772
   macro avg       0.58      0.80      0.61      3772
weighted avg       0.97      0.91      0.93      3772

*******************************************************
One-Class SVM degree 2

              precision    recall  f1-score   support

          -1       0.08      0.33      0.13        93
           1       0.98      0.91      0.94      3679

    accuracy                           0.89      3772
   macro avg       0.53      0.62      0.54      3772
weighted avg       0.96      0.89      0.92      3772

*******************************************************
One-Class SVM degree 3

              precision    recall  f1-score   support

          -1       0.08      0.33      0.13        93
  

In [9]:
# END