In [16]:
import numpy as np
import pandas as pd
import torch
import os
import sys
import random
from typing import Union
import pickle

In [17]:
from sklearn.decomposition import PCA
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.font_manager
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import MinMaxScaler

In [18]:
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    torch.random.manual_seed(seed)

In [19]:
seed = 69
seed_everything(seed)

In [20]:
def show_pca(clf, df, train_size:Union[int, float], scaler):
    X = df
    if train_size < 1: train_size = int(train_size * len(X))
    X_train = X[:train_size]
    scaler.fit(X_train)
    X_scaled = scaler.transform(X)
    pca = PCA(n_components=2)
    pca.fit(X_scaled[:train_size])
    X_pca = pca.transform(X_scaled)
    X_train, X_test = X_pca[:train_size], X_pca[train_size:]
    xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
    clf.fit(X_train)
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)

    n_error_train = y_pred_train[y_pred_train == -1].size
    n_error_test = y_pred_test[y_pred_test == -1].size

    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.title("Error Detection")
    print(Z.min(), Z.max())
    plt.contourf(xx, yy, Z, levels=np.linspace(Z.min(), 0, 7), cmap=plt.cm.PuBu)
    levels = [0, Z.max()] if Z.max() > 0 else [Z.min(), Z.mean()]
    plt.contourf(xx, yy, Z, levels=levels, colors='palevioletred')
    a = plt.contour(xx, yy, Z, levels=[Z.min()], linewidths=2, colors='darkred')
    X_outliers = X_test[y_pred_test == -1].copy()
    print('outliers: {}\n\tpercent outliers: {:.2f}'
          .format(len(X_outliers), 100 * len(X_outliers) / len(X_test)))
    X_test = X_test[y_pred_test == 1]
    s = 40
    b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white', s=s, edgecolors='k')
    b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='blueviolet', s=s,
                     edgecolors='k')
    c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='gold', s=s,
                    edgecolors='k')
    plt.axis('tight')
    plt.xlim(-.3, 1)
    plt.ylim(-.3,.4)
    plt.legend([a.collections[0], b1, b2, c],
               ["learned frontier", "training observations",
                "new regular observations", "new abnormal observations"],
               loc="upper left",
               prop=matplotlib.font_manager.FontProperties(size=11))
    plt.show()
    return clf

In [23]:
clf = OneClassSVM()
df = pd.read_csv('testdata.csv')
train_size = .01
scaler = MinMaxScaler()
clf = show_pca(clf, df, train_size, scaler)
pickle.dumps(clf)

FileNotFoundError: [Errno 2] No such file or directory: './testdata.csv'