# Support vector machines soft margin

 - Informações sobre o conjunto de dados
    1. variance of Wavelet Transformed image (continuous)
    2. skewness of Wavelet Transformed image (continuous)
    3. curtosis of Wavelet Transformed image (continuous)
    4. entropy of image (continuous)
    5. class (integer)




In [399]:
import sys 

sys.path.append("/home/davi/PycharmProjects/Reconhecimento-de-Padroes")

from sklearn.model_selection import KFold
from Strings import string
from sklearn.model_selection import train_test_split
from numpy import zeros, identity, array, concatenate, \
    ones, ravel, dot, where, multiply
from src.Utils.utils import get_accuracy, get_data
import cvxopt
import cvxopt.solvers

In [400]:
def get_p(number_lines, number_columns):
    tmp_aux = identity(number_columns)
    tmp_aux[:, number_columns-1] = 0
    
    print(tmp_aux.shape)
    P = cvxopt.matrix(tmp_aux, tc='d')
    return P

In [401]:
def get_q(number_columns):
    tmp1 = zeros((number_columns, 1))
    print(tmp1.shape)
    Q = cvxopt.matrix(tmp1, tc='d')
    
    return Q

In [402]:
def get_g(x, y, number_lines):
    if array(y, ndmin=2).shape[0] == 1: 
        tmp2 = array(y, ndmin=2).T * concatenate((x, ones((number_lines, 1))), axis=1)
    else:
        tmp2 = array(y, ndmin=2) * concatenate((x, ones((number_lines, 1))), axis=1)
    
    
    print(tmp2.shape)
    G = cvxopt.matrix(tmp2 * -1., tc='d')
    return G

In [403]:
def get_h(number_lines):
    tmp2 = ones((number_lines, 1))
    
    print(tmp2.shape)
    H = cvxopt.matrix(tmp2 * -1., tc='d')

    return H

In [404]:
def quadratic_solver(x_train, y_train):
    
    number_lines, number_columns = x_train.shape 
    
    # bias
    number_columns += 1
    
    P = get_p(number_lines, number_columns)
    Q = get_q(number_columns)
    G = get_g(x_train, y_train, number_lines)
    H = get_h(number_lines)
    
    

    try:
        solution = cvxopt.solvers.qp(P, Q, G, H)
        best_weights = ravel(solution['x'])  
        return best_weights
    
    except ValueError as error:
        print(error)

    

    


In [405]:
def train(x_train, y_train):
    solution = quadratic_solver(x_train, y_train)
    return solution

In [406]:
def get_foward(weights, x_test):
    
    print(weights)
    print(x_test.shape)
    
    if x_test.shape[1] == array(weights[:weights.shape[0]-1], ndmin=2).shape[1]:
        H_output = dot(x_test, array(weights[:weights.shape[0]-1], ndmin=2).T)

    else:
        H_output = dot(x_test, array(weights[:weights.shape[0] - 1], ndmin=2))

    H_output += weights[weights.shape[0]-1]
    
    # H_output = dot(x_test, array(weights[:2], ndmin=2).T) + weights[2]
    # vec_sup = sum(H_output == 1.)
    return H_output


In [407]:
def predict(h, domain=None):
    if domain == [-1., 1.]:
        for i in range(len(h)):
            if h[i][0] > 0:
                h[i][0] = 1
            else:
                h[i][0] = -1

        return h
    

In [408]:
def test(weights, x_test, y_test):
    h_output = get_foward(weights, x_test)
    y_output = predict(h_output, domain=[-1., 1.])

    if y_output.shape == y_test.T.shape:
        accuracy = get_accuracy(y_output, array(y_test, ndmin=2).T)
    else:
        accuracy = get_accuracy(y_output, array(y_test, ndmin=2))
    
    return accuracy

In [409]:
data = get_data("data_banknote_authentication.txt", type="csv")
number_lines = data.shape[0]
number_columns = data.shape[1]
X = array(data, ndmin=2)[:, :number_columns-1]
Y =   array(array(data, ndmin=2)[:, number_columns-1], ndmin=2).T
train_size = .8
test_size = .2

indices = where(Y == 0)
Y[indices] = -1

print(X)
print(Y)

[[  3.6216    8.6661   -2.8073   -0.44699]
 [  4.5459    8.1674   -2.4586   -1.4621 ]
 [  3.866    -2.6383    1.9242    0.10645]
 ...
 [ -3.7503  -13.4586   17.5932   -2.7771 ]
 [ -3.5637   -8.3827   12.393    -1.2823 ]
 [ -2.5419   -0.65804   2.6842    1.1952 ]]
[[-1.]
 [-1.]
 [-1.]
 ...
 [ 1.]
 [ 1.]
 [ 1.]]


In [410]:
for realization in range(5):
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)
    
    weights = train(x_train, y_train)
    # print(weights)
    if weights is None:
        print("ERROR")
        continue
    accuracy = test(weights, x_test, y_test)
    
    print(string.RUN.format(realization, None, accuracy, None))    

(5, 5)
(5, 1)
(1097, 5)
(1097, 1)
     pcost       dcost       gap    pres   dres
 0:  7.3795e-02  4.4205e+02  5e+03  2e+00  7e+03
 1:  3.6554e-01 -8.3338e+02  3e+03  1e+00  4e+03
 2:  4.6942e-01 -5.8933e+02  3e+03  1e+00  4e+03
 3:  4.7004e-01  2.1543e+02  3e+03  1e+00  4e+03
 4:  4.9211e-01  2.6186e+03  4e+03  1e+00  4e+03
 5:  1.2170e-01  2.2173e+04  4e+03  1e+00  4e+03
 6:  2.0764e-04  9.4791e+04  9e+03  1e+00  3e+03
 7:  1.1004e-04  2.0476e+05  2e+04  1e+00  3e+03
 8:  1.2576e-04  2.4210e+05  2e+04  1e+00  3e+03
 9:  6.4379e-05  6.7029e+05  5e+04  1e+00  3e+03
10:  6.7442e-05  7.8646e+05  6e+04  1e+00  3e+03
11:  5.4318e-05  1.6030e+06  1e+05  1e+00  3e+03
12:  5.7636e-05  1.6710e+06  1e+05  1e+00  3e+03
13:  2.9570e-05  7.9902e+06  4e+05  1e+00  3e+03
14:  1.2310e-05  6.4327e+07  2e+06  1e+00  3e+03
15:  1.1478e-06  1.4298e+09  1e+07  1e+00  3e+03
16:  5.3579e-09  1.3094e+11  9e+07  1e+00  3e+03
17:  2.1770e-12  1.7931e+14  3e+09  1e+00  3e+03
18:  6.8938e-15  1.1651e+19  1e+13  