In [2]:
import pandas as pd
import numpy as np
import random
import scipy.optimize as sco

In [3]:
data_raw = pd.read_csv("data\data_banknote_authentication.txt", header = None)
data_raw.columns = ["v", "w", "x", "y", "label"]
data_sub = data_raw.sample(10).reset_index(drop=True) #subset des eigentlichen Datensatzes
xVals = np.arange(0,1.01, 0.01)
#print(data_sub)

In [4]:
def polykernel(x,y):
    q = 4
    return((np.dot(np.transpose(y),x) + 1) ** q)

In [5]:
def precalculateKernel(data,classAttribute):
    N = data.shape[0]
    #print(N)
    
    ts = data[classAttribute]
    dataNoClass = data.drop(classAttribute,axis=1)
    xs = np.c_[np.ones(N), dataNoClass]
        
    N,d = xs.shape
    
    kernel = np.empty([N,N])
    
    for i in range(N):
        for j in range(N):
            kernel[i,j] = polykernel(xs[i],xs[j])
            
    return kernel

In [6]:
def minimize(data,classAttribute,dataKernel, C=1):
    N = data.shape[0]
    
    ts = data[classAttribute]
    dataNoClass = data.drop(classAttribute,axis=1)
    xs = np.c_[np.ones(N), dataNoClass]
        
    N,d = xs.shape
    
    alphaInit = np.empty([N,1])
    for i in range(N):
        alphaInit[i] = random.uniform(-1e-10,1e-10)
    
    
    def mainFun(alphas):
        summe = 0
        for i in range(N):
            for j in range(N):
                summe += alphas[i] * alphas[j] * ts[i] * ts[j] * dataKernel[i,j]
        
        summe = -0.5 * summe + np.sum(alphas)
        print (summe)
        return -summe
    
    def nbFun(alphas):
        summe = 0
        for i in range(N):
            summe += alphas[i] * ts[i]
        
        return summe
    
    return sco.minimize(
        fun = mainFun,
        x0 = alphaInit,
        options = {'maxiter': 50, 'disp': True},
        constraints = [
            { 'type': 'eq', 'fun': nbFun },
            { 'type': 'ineq', 'fun': (lambda alpha: (alpha)) },
            { 'type': 'ineq', 'fun': (lambda alpha: (C - alpha)) }
        ]
    )

In [11]:
#real data
#data_krnl = precalculateKernel(data_raw,"label")
#result = minimize(data_raw,"label",data_krnl)
#alphas = result.x #x bezeichnet im zur체ckgegebenen Datentyp der minimise funktion das tats채chliche Datenarray
#result
#subset
data_krnl = precalculateKernel(data_sub,"label")
result = minimize(data_sub,"label",data_krnl)
alphas = result.x #x bezeichnet im zur체ckgegebenen Datentyp der minimise funktion das tats채chliche Datenarray
result

-1.15650293746e-10
-1.15650293746e-10
1.47614089622e-08
1.47855109001e-08
-8.87446636703e-08
1.47855109001e-08
1.35490501417e-08
-1.03614979714e-07
1.47855109001e-08
1.47855109001e-08
1.47855109001e-08
1.47855109001e-08
5.9999999998
5.9999999998
6.00000001468
6.00000001471
5.99999991071
6.00000001471
6.00000001343
5.99999989584
6.00000001471
6.00000001471
6.00000001471
6.00000001471
Optimization terminated successfully.    (Exit mode 0)
            Current function value: -5.9999999998
            Iterations: 2
            Function evaluations: 24
            Gradient evaluations: 2


     fun: -5.999999999804384
     jac: array([-0.99825436, -1.        ,  5.97883987, -1.        , -0.91464972,
        6.97717065, -1.00000006, -1.00000006, -1.        , -1.        ,  0.        ])
 message: 'Optimization terminated successfully.'
    nfev: 24
     nit: 2
    njev: 2
  status: 0
 success: True
       x: array([ -1.80801484e-14,   1.00000000e+00,   1.41573139e-14,
         1.00000000e+00,  -1.84090463e-14,   2.23318807e-14,
         1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
         1.00000000e+00])

In [8]:
#testing and training samples
def split_test (data, prcnt_test):
    data_test = data.sample(frac = prcnt_test, axis = 0)
    return(data_test)
def split_train(data, data_test): 
       data_train = data[~data["x"].isin(data_test["x"])] 
       #http://stackoverflow.com/questions/29464234/compare-python-pandas-dataframes-for-matching-rows
       return(data_train)
   
test = split_test(data = data_sub, prcnt_test = 0.2)
train = split_train(data = data_sub, data_test = test)
print(test)
print(train)

        v        w        x       y  label
8  5.0860   3.2798 -1.27010  1.1189      0
6 -2.4473  12.6247  0.73573 -7.6612      0
         v         w        x        y  label
0 -0.88728   2.80800 -3.14320 -1.20350      1
1  4.03290   0.23175  0.89082  1.18230      0
2 -2.82670  -9.04070  9.06940 -0.98233      1
3 -2.26230  12.11770  0.28846 -7.75810      0
4 -0.37013  -5.55400  4.77490  1.54700      1
5 -2.00420  -9.36760  9.33330 -0.10303      1
7  5.11290  -0.49871  0.62863  1.11890      0
9  0.37980   0.70980  0.75720 -0.44440      0


In [9]:
def evaluateDiscriminationFunction(data,ws):
    data2 = data.copy()
    
    data2['ones'] = 1
    
    cols = list(data2.columns.values)
    cols = [cols[len(cols)-1]] + cols[0:(len(cols)-1)]
    
    data2 = data2.reindex_axis(cols,axis=1)
    
    classes = np.dot(data2, ws)
    
    for i in range(len(classes)):
        if classes[i] > 0:
            classes[i] = 1
        else:
            classes[i] = 0
    
    return classes

In [12]:
evaluateDiscriminationFunction(data_sub, result.x)

ValueError: shapes (10,6) and (10,) not aligned: 6 (dim 1) != 10 (dim 0)