In [2]:
import cvxopt
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Select the class 
def get_class(group, dataset):
        size = len(dataset[0]) - 1
        for i in range(len(dataset)):
            dataset[i][size] = 1 if dataset[i][size] == group else -1
        return dataset

# Normalize
def normalize(dataset):
        for i in range(dataset.shape[1]-1):
            max_ = max(dataset[:, i])
            min_ = min(dataset[:, i])
            for j in range(dataset.shape[0]):
                dataset[j, i] = (dataset[j, i] - min_) / (max_ - min_)
        return dataset

# Insert the bias
def insert_bias(data):
    group = []
    for i in range(len(data)):
        group.append(np.insert(data[i], 0, -1))
    group = np.asarray(group)
    return group

# Divide into train and test
def divide_samples(dataset):
    return train_test_split(dataset, test_size=0.20)

# Divide into x and y 
def split_samples(dataset, n_attributes):
    return np.array(dataset[:, 0:n_attributes-1]), np.array(dataset[:, -1])
 
# Calculate the quadratic solver por cvxopt
def q_solver(x_train, y_train):
    rows, cols = x_train.shape 
    
    # bias
    cols += 1
    
    P = getp(cols)
    q = getq(cols)
    G = getg(x_train, y_train, rows)
    h = geth(rows)
    try:
        s = cvxopt.solvers.qp(P, q, G, h)
        return np.ravel(s['x'])
    except ValueError as error:
        print(error)

# Return P
def getp(cols):
    aux = np.identity(cols)
    aux[:, cols-1] = 0
    return cvxopt.matrix(aux, tc='d')

# Return q
def getq(cols):
    aux = np.zeros((cols, 1))
    return cvxopt.matrix(aux, tc='d')

# Return G
def getg(x, y, rows):
    aux = np.array(y, ndmin=2).T * np.concatenate((x, np.ones((rows, 1))), axis=1)
    print(aux.shape)
    return cvxopt.matrix((aux * -1.).T.tolist())

# Return h
def geth(rows):
        return cvxopt.matrix(np.ones(rows) * -1)

# Return accuracy
def get_accuracy(y_output, y_test):
    return abs(sum(y_test == y_output)) * 1.0 / len(y_test) * 1.0

# Return the confusion matrix
def get_confusion_matrix(y_output, y_test):
    return confusion_matrix(y_test.tolist(), y_output.tolist())
 
# Predict
def predict(y):
    for i in range(len(y)):
        y[i][0] = 1 if y[i][0] > 0 else -1
    return y

# Delta rule
def delta(weights, x_test):
    output = np.dot(x_test, np.array(weights[:weights.shape[0] - 1], ndmin=2).T)
#     output += weights[weights.shape[0]-1]
    return output

# Training
def train(training_set):
    x_train, y_train = split_samples(training_set, len(training_set[0]))
    return q_solver(x_train, y_train)

# Test
def test(weights, test_set):
    x_test, y_test = split_samples(test_set, len(test_set[0]))
    h_output = delta(weights, x_test)
    y_output = predict(h_output)
    return get_accuracy(y_output, np.array(y_test, ndmin=2).T), get_confusion_matrix(y_output, np.array(y_test, ndmin=2).T)

# Evaluate an algorithm using hold-out
def execute(realizations, samples):
    print("### SVM - SoftMargin ###")
    print("PARÂMETROS: ")
    print("\t Total de realizações: ", realizations, "\n")
    rates = []
    for i in range(realizations):
        training_set, test_set = divide_samples(samples)
        print("### REALIZAÇÃO ", (i+1), "###")
        print("### FASE DE training ###")
        weights = train(training_set)
        if weights is None:
            print("ERROR")
            continue
        print("### FASE DE TESTES ###")
        accuracy, matriz = test(weights, test_set)
        print("Taxa de acerto: ", accuracy, "\n")
        rates.append(accuracy)
        print("Matriz de confusão: ", matriz, "\n")

    rates = np.array(rates)
    print("Acurácia: ", rates.mean())
    print("Variância da Acurácia: ", rates.var())
    print("Desvio Padrão da Acurácia: ", rates.std())
    print("### FIM do SVM ###")

dataset = np.array(pd.read_csv("base/column_2C.dat", delimiter=",", header=None))
dataset = get_class("NO", dataset)
dataset = normalize(dataset)
execute(5, dataset)

### SVM - SoftMargin ###
PARÂMETROS: 
	 Total de realizações:  5 

### REALIZAÇÃO  1 ###
### FASE DE training ###
(248, 7)
     pcost       dcost       gap    pres   dres
 0:  5.3744e+00  4.9495e+02  1e+03  3e+00  2e+02
 1:  7.2512e+01  7.1937e+02  9e+02  2e+00  1e+02
 2:  4.9973e+01  1.2165e+03  1e+03  2e+00  1e+02
 3:  4.9801e-01  3.6296e+03  1e+03  1e+00  1e+02
 4:  5.0024e-01  1.4870e+04  1e+03  1e+00  1e+02
 5:  4.2677e-01  4.2643e+04  3e+03  1e+00  1e+02
 6:  8.5222e+00  4.0737e+05  1e+04  1e+00  9e+01
 7:  1.1151e+01  4.7098e+05  1e+04  1e+00  9e+01
 8:  4.0078e+03  9.4616e+06  1e+05  1e+00  9e+01
 9:  3.2429e+03  3.4951e+07  4e+05  1e+00  9e+01
10:  6.8266e+02  1.7272e+08  2e+06  1e+00  9e+01
11:  1.1279e+02  2.5161e+09  3e+07  1e+00  9e+01
12:  1.9787e+01  1.1272e+11  6e+08  1e+00  9e+01
13:  2.4783e-02  1.9251e+13  5e+09  1e+00  9e+01
14:  3.0895e-06  8.1062e+16  2e+11  1e+00  1e+02
15:  2.0514e-07  2.1940e+22  2e+16  1e+00  2e+06
16:  6.5042e-07  2.9857e+27  2e+21  1e+00  2e