In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('skeweddatas.csv')

In [4]:
good = df[df['diag']==0]
bad = df[df['diag']==1]

outlier_fraction = len(bad)/float(len(good))
outlier_fraction

0.06959152798789713

In [5]:
print("No. of normal : {}".format(len(good)))
print("No. of anomaly : {}".format(len(bad)))

No. of normal : 661
No. of anomaly : 46


In [6]:
columns = df.columns.tolist()
columns = [c for c in columns if c not in ['diag']]
target = 'diag'

In [7]:
X = df[columns]
y = df[target]
print(X.shape)
print(y.shape)

(707, 31)
(707,)


In [8]:
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor

In [9]:
classifiers = {
    "Isolation Forest": IsolationForest(max_samples = len(X), contamination = outlier_fraction, random_state = 1 ),
    "Local Outlier Factor" : LocalOutlierFactor(n_neighbors = 20, contamination = outlier_fraction )
}
#fiting the data into different model and predicting the output 
n_outlier = len(bad)
for i, (clf_name, clf) in enumerate(classifiers.items()):
    if clf_name == "Local Outlier Factor":
        y_pred = clf.fit_predict(X)
        scores_pred = clf.negative_outlier_factor_
    else:
        clf.fit(X)
        scores_pred = clf.decision_function(X)
        y_pred = clf.predict(X)
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1
    n_errors = (y_pred != y).sum()
    print('{}: {}'.format(clf_name, n_errors))
    print('Accuracy Score : {}'.format(accuracy_score(y, y_pred)))
    print((classification_report(y, y_pred)))



Isolation Forest: 75
Accuracy Score : 0.8939179632248939
              precision    recall  f1-score   support

           0       0.94      0.94      0.94       661
           1       0.19      0.20      0.19        46

   micro avg       0.89      0.89      0.89       707
   macro avg       0.57      0.57      0.57       707
weighted avg       0.89      0.89      0.89       707

Local Outlier Factor: 87
Accuracy Score : 0.8769448373408769
              precision    recall  f1-score   support

           0       0.94      0.93      0.93       661
           1       0.08      0.09      0.08        46

   micro avg       0.88      0.88      0.88       707
   macro avg       0.51      0.51      0.51       707
weighted avg       0.88      0.88      0.88       707



