In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import cv2
from pyod.models.copod import COPOD
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import StandardScaler
from sklearn import metrics

In [2]:
DATA_DIR = 'data/'
DATA_FILENAME = '2lbp_subset20k'

In [3]:
data_train = np.load(DATA_DIR + DATA_FILENAME + '_train' + '.npz')
X_train = data_train['X']
y_train = data_train['y']
data_train.close()
X_train.shape, y_train.shape

((20000, 20), (20000,))

In [4]:
data_test = np.load(DATA_DIR + DATA_FILENAME + '_test' + '.npz')
X_test = data_test['X']
y_test = data_test['y']
data_test.close()
X_test.shape, y_test.shape

((10000, 20), (10000,))

In [5]:
X_train_pos = X_train[y_train == 1]
X_train_neg = X_train[y_train == 0]
X_train_pos.shape, X_train_neg.shape

((8112, 20), (11888, 20))

In [6]:
pos_clf = COPOD()
pos_clf.fit(X_train_pos)
pos_pos_scores = pos_clf.decision_scores_
pos_neg_scores = pos_clf.decision_function(X_train_neg)
print( pos_pos_scores.mean(), pos_pos_scores.std() )
print( pos_neg_scores.mean(), pos_neg_scores.std() )

26.63714575158138 11.050710502932208
28.700729681595927 14.16617757247587


In [7]:
neg_clf = COPOD()
neg_clf.fit(X_train_neg)
neg_neg_scores = neg_clf.decision_scores_
neg_pos_scores = neg_clf.decision_function(X_train_pos)
print( neg_neg_scores.mean(), neg_neg_scores.std() )
print( neg_pos_scores.mean(), neg_pos_scores.std() )

26.710207147880144 12.358700194926422
23.763838585170344 7.298115168841595


In [8]:
Xf_train = np.hstack((
    pos_clf.decision_function(X_train).reshape(-1,1),
    neg_clf.decision_function(X_train).reshape(-1,1)))
Xf_test = np.hstack((
    pos_clf.decision_function(X_test).reshape(-1,1),
    neg_clf.decision_function(X_test).reshape(-1,1)))
Xf_train.shape, Xf_test.shape

((20000, 2), (10000, 2))

In [9]:
clf = GradientBoostingClassifier(n_estimators=100)
clf.fit(Xf_train, y_train)
print(clf.score(Xf_train, y_train))
print(clf.score(Xf_test, y_test))

0.7442
0.7219


In [10]:
Xg_train = np.hstack((Xf_train, X_train))
Xg_test = np.hstack((Xf_test, X_test))
Xg_train.shape, Xg_test.shape

((20000, 22), (10000, 22))

In [11]:
clf2 = GradientBoostingClassifier(n_estimators=400)
clf2.fit(Xg_train, y_train)
print(clf2.score(Xg_train, y_train))
print(clf2.score(Xg_test, y_test))

0.83425
0.7993
