In [1]:
from sklearn.datasets import fetch_openml
from sklearn import svm
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.utils import check_random_state
from sklearn.model_selection import GridSearchCV

In [2]:
from collections import Counter
import pickle
import time

In [3]:
X, y = fetch_openml('mnist_784', version=1, data_home='/ScikitData', return_X_y=True)

In [4]:
def find_idx(arr, target):
    ans = []
    for i in range(len(arr)):
        if arr[i] == target:
            ans.append(i)
    return ans

In [5]:
idx_test = find_idx(y,"9")

In [6]:
idx_train = list(set(range(len(y))).difference(set(idx_test)))

In [7]:
X_train = X[idx_train]
X_test = X[idx_test]
y_train = y[idx_train]
y_test = y[idx_test]

In [8]:
parameters = {'nu':[0.1, 0.3, 0.5]}

In [9]:
models = {}
ISS = {}
OOS = {}
for n in parameters['nu']:
    print("Starting the batch " +'.'+str(n))
    start = time.time()
    model = svm.OneClassSVM(nu=n,kernel='linear')
    model.fit(X_train)
    pickle.dump(model, open("model"+'.'+str(n)+".pkl", "wb"))
    models[str(n)] = model
    predictions_IS = model.predict(X_train)
    in_sample_score = Counter(predictions_IS)[1]/(Counter(predictions_IS)[1]+Counter(predictions_IS)[-1])
    pickle.dump(in_sample_score, open("ISS"+'.'+str(n)+".pkl", "wb"))
    ISS[str(n)] = in_sample_score
    predictions_OOS = model.predict(X_test)
    out_of_sample_score = Counter(predictions_OOS)[-1]/(Counter(predictions_OOS)[1]+Counter(predictions_OOS)[-1])
    pickle.dump(out_of_sample_score, open("OOS"+'.'+str(n)+".pkl", "wb"))
    OOS[str(n)] = out_of_sample_score
    end = time.time()
    print("Time elapsed:")
    print(end - start)

Starting the batch .0.1
Time elapsed:
1331.561294078827
Starting the batch .0.3
Time elapsed:
3666.86416888237
Starting the batch .0.5
Time elapsed:
5389.891309499741


In [15]:
import pickle
for n in [0.1,0.3,0.5]:
    print(str(n))
    print("ISS")
    fp = open("ISS"+'.'+str(n)+".pkl","rb+")
    A = pickle.load(fp, encoding='bytes')
    print(A)
    print("OOS")
    fp = open("OOS"+'.'+str(n)+".pkl","rb+")
    A = pickle.load(fp, encoding='bytes')
    print(A)

0.1
ISS
0.8999714476063577
OOS
0.061655648174762864
0.3
ISS
0.6999619301418102
OOS
0.23584363322793905
0.5
ISS
0.5000158624409125
OOS
0.4787295199770049


In [10]:
parameters = {'kernel':['poly'], 'nu':[0.1, 0.3, 0.5], 'gamma':[0.1,1,10]}

In [11]:
models = {}
ISS = {}
OOS = {}
for k in parameters['kernel']:
    for n in parameters['nu']:
        for g in parameters['gamma']:
            print("Starting the batch " + str(k)+'.'+str(n)+'.'+str(g))
            start = time.time()
            model = svm.OneClassSVM(nu=n,kernel=k,gamma=g)
            model.fit(X_train)
            pickle.dump(model, open("model"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            models[str(k)+'.'+str(n)+'.'+str(g)] = model
            predictions_IS = model.predict(X_train)
            in_sample_score = Counter(predictions_IS)[1]/(Counter(predictions_IS)[1]+Counter(predictions_IS)[-1])
            pickle.dump(in_sample_score, open("ISS"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            ISS[str(k)+'.'+str(n)+'.'+str(g)] = in_sample_score
            predictions_OOS = model.predict(X_test)
            out_of_sample_score = Counter(predictions_OOS)[-1]/(Counter(predictions_OOS)[1]+Counter(predictions_OOS)[-1])
            pickle.dump(out_of_sample_score, open("OOS"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            OOS[str(k)+'.'+str(n)+'.'+str(g)] = out_of_sample_score
            end = time.time()
            print("Time elapsed:")
            print(end - start)

Starting the batch poly.0.1.0.1
Time elapsed:
1352.8279485702515
Starting the batch poly.0.1.1
Time elapsed:
1355.326684474945
Starting the batch poly.0.1.10
Time elapsed:
1349.9955577850342
Starting the batch poly.0.3.0.1
Time elapsed:
3660.5943517684937
Starting the batch poly.0.3.1
Time elapsed:
3595.498925924301
Starting the batch poly.0.3.10
Time elapsed:
3595.443622112274
Starting the batch poly.0.5.0.1
Time elapsed:
5272.698080778122
Starting the batch poly.0.5.1
Time elapsed:
5309.4495005607605
Starting the batch poly.0.5.10
Time elapsed:
5323.000806570053


In [12]:
ISS

{'poly.0.1.0.1': 0.8999238602836205,
 'poly.0.1.1': 0.8998762729608832,
 'poly.0.1.10': 0.8999873100472701,
 'poly.0.3.0.1': 0.6999777925827226,
 'poly.0.3.1': 0.7000888296691095,
 'poly.0.3.10': 0.7000412423463722,
 'poly.0.5.0.1': 0.5000634497636496,
 'poly.0.5.1': 0.4999682751181752,
 'poly.0.5.10': 0.5}

In [13]:
OOS

{'poly.0.1.0.1': 0.07660247197470538,
 'poly.0.1.1': 0.07660247197470538,
 'poly.0.1.10': 0.07660247197470538,
 'poly.0.3.0.1': 0.2655935613682093,
 'poly.0.3.1': 0.2655935613682093,
 'poly.0.3.10': 0.2655935613682093,
 'poly.0.5.0.1': 0.49942512216154067,
 'poly.0.5.1': 0.49942512216154067,
 'poly.0.5.10': 0.49942512216154067}

In [16]:
from itertools import cycle

from sklearn import svm, datasets
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize
from sklearn.multiclass import OneVsRestClassifier
from scipy import interp

In [None]:
classifier = svm.SVC(kernel='linear', probability=True,
                                 random_state=random_state)
y_score = models['poly.0.5.10'].fit(X_train, y_train).decision_function(X_test)