In [1]:
from sklearn.datasets import fetch_openml
from sklearn import svm
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.utils import check_random_state
import pickle
import imutils
import time

In [2]:
import cv2
from skimage import exposure

In [3]:
from collections import Counter
X_MNIST, y_MNIST = fetch_openml('mnist_784', version=1, data_home='/ScikitData', return_X_y=True)

In [4]:
X_CIFAR, y_CIFAR = fetch_openml('CIFAR_10_small', version=1, data_home='/CIFAR_10_small', return_X_y=True)

In [5]:
X_CIFAR_red = []
X_CIFAR_green = []
X_CIFAR_blue = []
for i in range(len(X_CIFAR)):
    X_CIFAR_red.append(X_CIFAR[i][:1024])
    X_CIFAR_green.append(X_CIFAR[i][1024:2048])
    X_CIFAR_blue.append(X_CIFAR[i][2048:])

In [6]:
X_CIFAR_red = np.array(X_CIFAR_red)
X_CIFAR_green = np.array(X_CIFAR_green)
X_CIFAR_blue = np.array(X_CIFAR_blue)

In [7]:
X_CIFAR_red_re = []
for i in range(len(X_CIFAR_red)):
    A = X_CIFAR_red[i].reshape((32,32)).astype("uint8")
    A = exposure.rescale_intensity(A, out_range=(0, 255))
    A = imutils.resize(A, width=28)
    A = A.reshape(784)
    X_CIFAR_red_re.append(A)
X_CIFAR_red_re = np.array(X_CIFAR_red_re)

In [8]:
X_CIFAR_green_re = []
for i in range(len(X_CIFAR_green)):
    A = X_CIFAR_green[i].reshape((32,32)).astype("uint8")
    A = exposure.rescale_intensity(A, out_range=(0, 255))
    A = imutils.resize(A, width=28)
    A = A.reshape(784)
    X_CIFAR_green_re.append(A)
X_CIFAR_green_re = np.array(X_CIFAR_green_re)

In [9]:
X_CIFAR_blue_re = []
for i in range(len(X_CIFAR_blue)):
    A = X_CIFAR_blue[i].reshape((32,32)).astype("uint8")
    A = exposure.rescale_intensity(A, out_range=(0, 255))
    A = imutils.resize(A, width=28)
    A = A.reshape(784)
    X_CIFAR_blue_re.append(A)
X_CIFAR_blue_re = np.array(X_CIFAR_blue_re)

In [13]:
X_CIFAR_grey_re = []
for i in range(len(X_CIFAR_red_re)):
    # 0.2989 * R + 0.5870 * G + 0.1140 * B 
    A = 0.2989*X_CIFAR_red_re[i]+0.5870*X_CIFAR_green_re[i]+0.1140*X_CIFAR_blue_re[i]
    X_CIFAR_grey_re.append(A)
X_CIFAR_grey_re = np.array(X_CIFAR_grey_re)

In [14]:
X_CIFAR_grey_re.shape

(20000, 784)

In [15]:
parameters = {'nu':[0.1, 0.3, 0.5, 0.7, 0.9]}

In [16]:
models = {}
ISS = {}
OOS = {}
for n in parameters['nu']:
    print("Starting the batch " +'.'+str(n))
    start = time.time()
    model = svm.OneClassSVM(nu=n,kernel='linear')
    model.fit(X_MNIST)
    pickle.dump(model, open("model_O"+'.'+str(n)+".pkl", "wb"))
    models[str(n)] = model
    predictions_IS = model.predict(X_MNIST)
    in_sample_score = Counter(predictions_IS)[1]/(Counter(predictions_IS)[1]+Counter(predictions_IS)[-1])
    pickle.dump(in_sample_score, open("ISS_O"+'.'+str(n)+".pkl", "wb"))
    ISS[str(n)] = in_sample_score
    predictions_OOS = model.predict(X_CIFAR_grey_re)
    out_of_sample_score = Counter(predictions_OOS)[-1]/(Counter(predictions_OOS)[1]+Counter(predictions_OOS)[-1])
    pickle.dump(out_of_sample_score, open("OOS_O"+'.'+str(n)+".pkl", "wb"))
    OOS[str(n)] = out_of_sample_score
    end = time.time()
    print("Time elapsed:")
    print(end - start)

Starting the batch .0.1
Time elapsed:
1707.386312007904
Starting the batch .0.3
Time elapsed:
4632.080651760101
Starting the batch .0.5
Time elapsed:
6954.725334644318
Starting the batch .0.7
Time elapsed:
9103.174491882324
Starting the batch .0.9
Time elapsed:
9862.79226398468


In [None]:
#parameters = {'kernel':['poly'], 'nu':[0.1, 0.3, 0.5], 'gamma':[1]}

In [None]:
'''
models = {}
ISS = {}
OOS = {}
for k in parameters['kernel']:
    for n in parameters['nu']:
        for g in parameters['gamma']:
            print("Starting the batch " + str(k)+'.'+str(n)+'.'+str(g))
            start = time.time()
            model = svm.OneClassSVM(nu=n,kernel=k,gamma=g)
            model.fit(X_MNIST)
            pickle.dump(model, open("model_O"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            models[str(k)+'.'+str(n)+'.'+str(g)] = model
            predictions_IS = model.predict(X_MNIST)
            in_sample_score = Counter(predictions_IS)[1]/(Counter(predictions_IS)[1]+Counter(predictions_IS)[-1])
            pickle.dump(in_sample_score, open("ISS_O"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            ISS[str(k)+'.'+str(n)+'.'+str(g)] = in_sample_score
            predictions_OOS = model.predict(X_CIFAR_grey_re)
            out_of_sample_score = Counter(predictions_OOS)[-1]/(Counter(predictions_OOS)[1]+Counter(predictions_OOS)[-1])
            pickle.dump(out_of_sample_score, open("OOS_O"+str(k)+'.'+str(n)+'.'+str(g)+".pkl", "wb"))
            OOS[str(k)+'.'+str(n)+'.'+str(g)] = out_of_sample_score
            end = time.time()
            print("Time elapsed:")
            print(end - start)
'''

Starting the batch poly.0.1.0.1
Time elapsed:
1743.3018085956573
Starting the batch poly.0.1.1
