# One class classification on data set iris

In [3]:
from sklearn.datasets import load_iris
import numpy as np
import matplotlib.pyplot as plt 

iris = load_iris(as_frame=True)

In [4]:
iris 

{'data':      sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
 0                  5.1               3.5                1.4               0.2
 1                  4.9               3.0                1.4               0.2
 2                  4.7               3.2                1.3               0.2
 3                  4.6               3.1                1.5               0.2
 4                  5.0               3.6                1.4               0.2
 ..                 ...               ...                ...               ...
 145                6.7               3.0                5.2               2.3
 146                6.3               2.5                5.0               1.9
 147                6.5               3.0                5.2               2.0
 148                6.2               3.4                5.4               2.3
 149                5.9               3.0                5.1               1.8
 
 [150 rows x 4 columns],
 'target': 0     

In [5]:
from sklearn import svm
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, cross_val_score, cross_val_predict

In [12]:
X, y = iris.data, iris.target

In [31]:
X.insert(loc=4, column='class', value=y)

In [33]:
setosa = X[X['class'] == 0]
setosa

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
5,5.4,3.9,1.7,0.4,0
6,4.6,3.4,1.4,0.3,0
7,5.0,3.4,1.5,0.2,0
8,4.4,2.9,1.4,0.2,0
9,4.9,3.1,1.5,0.1,0


In [35]:
versicolor = X[X['class'] == 1]
versicolor

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
50,7.0,3.2,4.7,1.4,1
51,6.4,3.2,4.5,1.5,1
52,6.9,3.1,4.9,1.5,1
53,5.5,2.3,4.0,1.3,1
54,6.5,2.8,4.6,1.5,1
55,5.7,2.8,4.5,1.3,1
56,6.3,3.3,4.7,1.6,1
57,4.9,2.4,3.3,1.0,1
58,6.6,2.9,4.6,1.3,1
59,5.2,2.7,3.9,1.4,1


In [37]:
virginica = X[X['class'] == 2]
virginica

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),class
100,6.3,3.3,6.0,2.5,2
101,5.8,2.7,5.1,1.9,2
102,7.1,3.0,5.9,2.1,2
103,6.3,2.9,5.6,1.8,2
104,6.5,3.0,5.8,2.2,2
105,7.6,3.0,6.6,2.1,2
106,4.9,2.5,4.5,1.7,2
107,7.3,2.9,6.3,1.8,2
108,6.7,2.5,5.8,1.8,2
109,7.2,3.6,6.1,2.5,2


In [66]:
from collections import defaultdict

def one_class_holdout(X):
    clf = svm.OneClassSVM(kernel='rbf', gamma='scale')
    X = X.drop(columns='class')

    scores = defaultdict()

    for i in np.arange(0.1, 0.99, 0.1):
        temp = []

        for j in range(21):
            X_train, X_test = train_test_split(X, train_size=i, random_state=j)

            clf.fit(X_train)
            preds = clf.predict(X_test)

            tot = preds.size
            n_corr = sum([1 if pred == 1 else 0 for pred in preds])
            n_uncorr = sum([1 if pred == -1 else 0 for pred in preds])

            score = n_corr / tot 
            temp.append(score)

        scores[i] = np.mean(temp)

    best = max(scores, key=scores.get)
            
    return best, scores[best]

one_class_holdout(setosa)


(0.8, 0.5714285714285713)

In [67]:
one_class_holdout(virginica), one_class_holdout(versicolor)

((0.9, 0.5523809523809524), (0.9, 0.5238095238095238))

In [77]:
def one_class_kfold(X):
    clf = svm.OneClassSVM(kernel='rbf', gamma='scale')
    X = X.drop(columns='class')

    res = defaultdict()
    for i in range(2, 10):
        all_scores = []

        for j in range(21):
            cv = KFold(n_splits=i, shuffle=True, random_state=j)

            scores = []
            for fold_idx, (train_idx, test_idx) in enumerate(cv.split(X)):
                X_train, X_test = X.values[train_idx], X.values[test_idx]

                clf.fit(X_train)
                predictions = clf.predict(X_test)

                n_corr = sum([1 if 1 else 0 for pred in predictions])
                score = n_corr / predictions.size

                scores.append(score)
            
            all_scores.append(np.mean(scores))
        
        res[i] = np.mean(all_scores)
    
    best = max(res, key=res.get)
    return best, res[best]

one_class_kfold(setosa)

(2, 1.0)

In [78]:
one_class_kfold(versicolor), one_class_kfold(virginica)

((2, 1.0), (2, 1.0))