In [2]:
import pandas as pd
import numpy as np
import random
import scipy.optimize as sco

In [95]:
data_raw = pd.read_csv("data_banknote_authentication.txt", header = None)
data_raw.columns = ["v", "w", "x", "y", "label"]
data_sub = data_raw.sample(10).reset_index(drop=True) #subset des eigentlichen Datensatzes
xVals = np.arange(0,1.01, 0.01)

In [14]:
def polykernel(x,y):
    q = 4
    return((np.dot(np.transpose(y),x) + 1) ** q)

In [20]:
def precalculateKernel(data,classAttribute):
    N = data.shape[0]
    #print(N)
    
    ts = data[classAttribute]
    dataNoClass = data.drop(classAttribute,axis=1)
    xs = np.c_[np.ones(N), dataNoClass]
        
    N,d = xs.shape
    
    kernel = np.empty([N,N])
    
    for i in range(N):
        for j in range(N):
            kernel[i,j] = polykernel(xs[i],xs[j])
            
    return kernel

In [16]:
def minimize(data,classAttribute,dataKernel, C=1):
    N = data.shape[0]
    
    ts = data[classAttribute]
    dataNoClass = data.drop(classAttribute,axis=1)
    xs = np.c_[np.ones(N), dataNoClass]
        
    N,d = xs.shape
    
    alphaInit = np.empty([N,1])
    for i in range(N):
        alphaInit[i] = random.uniform(-1e-10,1e-10)
    
    
    def mainFun(alphas):
        summe = 0
        for i in range(N):
            for j in range(N):
                summe += alphas[i] * alphas[j] * ts[i] * ts[j] * dataKernel[i,j]
        
        summe = -0.5 * summe + np.sum(alphas)
        print (summe)
        return -summe
    
    def nbFun(alphas):
        summe = 0
        for i in range(N):
            summe += alphas[i] * ts[i]
        
        return summe
    
    return sco.minimize(
        fun = mainFun,
        x0 = alphaInit,
        options = {'maxiter': 50, 'disp': True},
        constraints = [
            { 'type': 'eq', 'fun': nbFun },
            { 'type': 'ineq', 'fun': (lambda alpha: (alpha)) },
            { 'type': 'ineq', 'fun': (lambda alpha: (C - alpha)) }
        ]
    )

In [96]:
#real data
#data_krnl = precalculateKernel(data_raw,"label")
#result = minimize(data_raw,"label",data_krnl)
#alphas = result.x #x bezeichnet im zurückgegebenen Datentyp der minimise funktion das tatsächliche Datenarray
#result
#subset
data_krnl = precalculateKernel(data_sub,"label")
result = minimize(data_sub,"label",data_krnl)
alphas = result.x #x bezeichnet im zurückgegebenen Datentyp der minimise funktion das tatsächliche Datenarray
result

4.12535107012e-11
4.12535107012e-11
1.4877445365e-08
1.49424101007e-08
1.48666501258e-08
1.49424147045e-08
1.35692769127e-08
1.49424147045e-08
1.49424147045e-08
1.49424147045e-08
1.49424147045e-08
1.49424147045e-08
5.99999999986
5.99999999986
6.0000000147
6.00000001477
6.00000001469
6.00000001477
6.0000000134
6.00000001477
6.00000001477
6.00000001477
6.00000001477
6.00000001477
Optimization terminated successfully.    (Exit mode 0)
            Current function value: -5.99999999986
            Iterations: 2
            Function evaluations: 24
            Gradient evaluations: 2


     fun: -5.9999999998646745
     jac: array([-0.99572963, -0.9999997 , -0.99492544, -1.        , -0.90842658,
       -1.        , -1.        , -1.        , -1.00000006, -1.        ,  0.        ])
 message: 'Optimization terminated successfully.'
    nfev: 24
     nit: 2
    njev: 2
  status: 0
 success: True
       x: array([  4.83749381e-18,   8.94773311e-18,   4.70423187e-18,
         1.00000000e+00,  -1.84894588e-17,   1.00000000e+00,
         1.00000000e+00,   1.00000000e+00,   1.00000000e+00,
         1.00000000e+00])

In [93]:
#testing and training samples
def split_test (data, prcnt_test):
    data_test = data.sample(frac = prcnt_test, axis = 0)
    return(data_test)
def split_train(data, data_test): 
       data_train = data[~data["x"].isin(data_test["x"])] 
       #http://stackoverflow.com/questions/29464234/compare-python-pandas-dataframes-for-matching-rows
       return(data_train)
   
test = split_test(data = data_sub, prcnt_test = 0.2)
train = split_train(data = data_sub, data_test = test)
print(test)
print(train)

         v        w        x        y  label
8  0.11806  0.39108 -0.98223  0.42843      1
2 -3.55100  1.89550  0.18650 -2.44090      1
         v         w         x         y  label
0 -0.88728   2.80800  -3.14320 -1.203500      1
1  4.17570  10.26150  -3.85520 -4.305600      0
3  2.82370   2.85970   0.19678  0.571960      0
4  0.32924  -4.45520   4.57180 -0.988800      0
5 -2.45610  -4.55660   6.45340 -0.056479      1
6  0.15423   0.11794  -1.68230  0.595240      1
7 -2.31420  -0.68494   1.98330 -0.448290      1
9 -3.26920 -12.74060  15.55730 -0.141820      1
