In [1]:
%matplotlib inline

In [2]:
import numpy as np
from sklearn.svm import LinearSVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import math
from multiprocessing.pool import Pool
import datetime

In [3]:
class OVRSVC():
    def __init__(self, **kwargs):
        self.c2svc = {}
        self.kwargs = kwargs
    
    def fit_one(self, dataset):
        X = dataset[:, :-1]
        y = dataset[:, -1].astype(bool)
        svc = LinearSVC(**self.kwargs)
        svc.fit(X, y)
        return svc
    
    def fit(self, X, y, num_pos):
        for c in set(i for i in y):
            self.c2svc[c] = []
            X_pos = np.concatenate((np.random.permutation(X[y == c]), np.ones(((y==c).sum(), 1))), axis=1)
            X_neg = np.concatenate((np.random.permutation(X[y != c]), np.zeros(((y!=c).sum(), 1))), axis=1)
#             num_pos = 10
            num_neg = num_pos * 2
            step_pos = math.ceil(len(X_pos) // num_pos)
            step_neg = math.ceil(len(X_neg) // num_neg)
            X_poses = [X_pos[i*step_pos:(i+1)*step_pos] for i in range(num_pos)]
            X_neges = [X_neg[j*step_neg:(j+1)*step_neg] for j in range(num_neg)]
            svcs = Pool().map(self.fit_one, [np.concatenate((pos, neg)) for pos in X_poses for neg in X_neges])
            self.c2svc[c] = [svcs[i*num_neg:(i+1)*num_neg] for i in range(num_pos)]
        return self
    
    def predict(self, X):
        global results, confidences
        confidences = np.zeros((X.shape[0], len(self.c2svc)))
        for c, svcs in self.c2svc.items():
            results = []
            for svc_same_pos in svcs:
                results.append([])
                for svc in svc_same_pos:
                    results[-1].append(svc.decision_function(X).reshape(1, -1))
                results[-1] = np.min(np.concatenate(results[-1], axis=0), axis=0).reshape(1, -1)
            confidences[:, int(c)] = np.max(np.concatenate(results, axis=0), axis=0)
        result = confidences.argmax(axis=1)
        return result

    def score(self, X, y):
        result = self.predict(X)
        score = (result == y).sum() / len(y)
        return score

In [6]:
class OVRSVC():
    def __init__(self, **kwargs):
        self.c2svc = {}
        self.kwargs = kwargs
    
    def fit_one(self, dataset):
        X = dataset[:, :-1]
        y = dataset[:, -1].astype(bool)
        svc = LinearSVC(**self.kwargs)
        svc.fit(X, y)
        return svc
    
    def fit(self, X, y, num_pos):
#         num_pos = 5
        num_neg = num_pos * 2
        X_0 = np.random.permutation(X[y == 0])
        step_0 = math.ceil(len(X_0) // num_pos)
        X_0s = [X_0[i*step_0:(i+1)*step_0] for i in range(num_pos)]
        X_1 = np.random.permutation(X[y == 1])
        step_1 = math.ceil(len(X_1) // num_pos)
        X_1s = [X_1[i*step_1:(i+1)*step_1] for i in range(num_pos)]
        X_2 = np.random.permutation(X[y == 2])
        step_2 = math.ceil(len(X_2) // num_pos)
        X_2s = [X_2[i*step_2:(i+1)*step_2] for i in range(num_pos)]
        
        svcs = Pool().map(self.fit_one, [np.concatenate((np.concatenate((pos, np.ones((len(pos), 1))), axis=1), np.concatenate((neg, np.zeros((len(neg), 1))), axis=1))) for pos in X_0s for neg in X_1s + X_2s])
        self.c2svc[0] = [svcs[i*num_neg:(i+1)*num_neg] for i in range(num_pos)]
        svcs = Pool().map(self.fit_one, [np.concatenate((np.concatenate((pos, np.ones((len(pos), 1))), axis=1), np.concatenate((neg, np.zeros((len(neg), 1))), axis=1))) for pos in X_1s for neg in X_0s + X_2s])
        self.c2svc[1] = [svcs[i*num_neg:(i+1)*num_neg] for i in range(num_pos)]
        svcs = Pool().map(self.fit_one, [np.concatenate((np.concatenate((pos, np.ones((len(pos), 1))), axis=1), np.concatenate((neg, np.zeros((len(neg), 1))), axis=1))) for pos in X_2s for neg in X_0s + X_1s])
        self.c2svc[2] = [svcs[i*num_neg:(i+1)*num_neg] for i in range(num_pos)]
        return self
    
    def predict(self, X):
        global results, confidences
        confidences = np.zeros((X.shape[0], len(self.c2svc)))
        for c, svcs in self.c2svc.items():
            results = []
            for svc_same_pos in svcs:
                results.append([])
                for svc in svc_same_pos:
                    results[-1].append(svc.decision_function(X).reshape(1, -1))
                results[-1] = np.min(np.concatenate(results[-1], axis=0), axis=0).reshape(1, -1)
            confidences[:, int(c)] = np.max(np.concatenate(results, axis=0), axis=0)
        result = confidences.argmax(axis=1)
        return result

    def score(self, X, y):
        result = self.predict(X)
        score = (result == y).sum() / len(y)
        return score

In [4]:
scaler = MinMaxScaler(copy=False)

X_train = np.load('data_hw2/train_data.npy')
y_train = np.load('data_hw2/train_label.npy').astype(int) + 1
# X_train, X_dev, y_train, y_dev = train_test_split(X_train, y_train, test_size=0.5)
X_test = np.load('data_hw2/test_data.npy')
y_test = np.load('data_hw2/test_label.npy').astype(int) + 1

scaler.fit(X_train)
X_train = scaler.transform(X_train)
# X_dev = scaler.transform(X_dev)
X_test = scaler.transform(X_test)

In [16]:
ovr_svc = OVRSVC()
ovr_svc.fit(X_train, y_train, 3)

<__main__.OVRSVC at 0x7f390010ce80>

In [6]:
ovr_svc.predict(X_test)

array([0, 0, 0, ..., 0, 0, 0])

In [17]:
ovr_svc.score(X_test, y_test)

0.48483956432146014