In [1]:
import numpy as np
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score

In [2]:
#generate random training data from a normal distribution
X_0 = 0.3 * np.random.randn(100, 2) + 1
X_1 = 0.3 * np.random.randn(100, 2) + 1
X_2 = 0.3 * np.random.randn(20, 2) + 1

In [3]:
#transformation for a two dimensional point so we can use linear svm methods
def quadratic(point):
    x = point[0]
    y = point[1]
    return [1, x, y, x * x, x * y, y * y]

In [4]:
#transform our datapoints to this higher dimensional space
trans_X_0 = [0] * len(X_0)
trans_X_1 = [0] * len(X_1)
trans_X_2 = [0] * len(X_2)

for i in range(len(X_0)):
    trans_X_0[i] = quadratic(X_0[i])
for i in range(len(X_1)):
    trans_X_1[i] = quadratic(X_1[i])
for i in range(len(X_2)):
    trans_X_2[i] = quadratic(X_2[i])

datasets = [trans_X_1, trans_X_2]

In [5]:
#classify our data into outliers and inliers, initially we'll do this with a one-class SVM
clf0 = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma="auto")
clf0.fit(X_0)
#create labels to train our linear svm
Y = clf0.predict(X_0)

In [6]:
#train the linear svm using partial_fit so we can online train
clf = PassiveAggressiveClassifier()
clf.partial_fit(trans_X_0, Y, classes=[-1, 1])

#keep track of classifier at each point in time
classifiers = [clf]

In [7]:
#train subsequent datasets
for dataset in datasets:
    temp_Y = clf.predict(dataset)
    if accuracy_score(temp_Y, [1] * len(temp_Y)) > 0.5:
        clf.partial_fit(dataset, temp_Y)
    classifiers.append(clf)