# 05 Softmargin Classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets

In [2]:
iris = datasets.load_iris()
data = iris.data

### Implement Softmargin

In [3]:
def softmargin(data,labels,supportvec):
    """
    Computes the softmargin scorebased on
    the data and the given support vectors
    """
    # compute the threshold as the mean of the two support vectors
    threshold = np.mean(supportvec)
    # classiy the data where is is lower and larger the threshold (True/1)
    y_pred = data > threshold
    # boolean can be compared to 0,1-array:
    result = y_pred == labels
    # incorrectly classified
    n_missclassification = (result == False).sum()
    # correctly classified
    n_correctclassification = result.sum()
    score = n_correctclassification - n_missclassification
    # compute accuracy:
    n_datapoints = len(data)
    accuracy = n_correctclassification / n_datapoints
    
    return score, accuracy

In [4]:
# test the function
softmargin(data=iris.data[:,-1], labels=iris.target,
           supportvec=np.random.choice(iris.data[:,-1], 2, replace=False))

(-6, 0.48)

In [5]:
# prepare data

In [6]:
def train_test_split(X, y, shuffle=True, test_size=0.3):
    """
    Split data into training and testing set.
    """
    n_train = int(X.shape[0]*(1-test_size))
    indices = np.arange(len(X))
    if shuffle: 
        np.random.shuffle(indices)
    train = indices[:n_train]
    test = indices[n_train:]
    X_train, X_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    
    return X_train, X_test, y_train, y_test

### Test Softmargin

In [7]:
X = iris.data[:,-1] # use petal width only
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X=X, y=y,shuffle=True, test_size=0.3)

In [8]:
# for binary classification: combine 'versicolor' and 'virginica' to one class (1)
y_train = np.where(y_train==2, 1, y_train)
y_test = np.where(y_test==2, 1, y_test)

In [9]:
# Run the softmargin function 20 times with random support vectors

In [10]:
support_vectors = np.random.choice(X_train, 40, replace=False)
np.random.shuffle(support_vectors)
result_list = []
# iterate over range with odd numbers 1-39
for i in range(1, 40, 2):
    supportvec = np.array(support_vectors[[i-1,i]])
    # apply softmargin to get softmargin score
    score = softmargin(data=X_train, labels=y_train, supportvec=supportvec)[0]
    result = [supportvec, score]
    result_list.append(result)

In [11]:
# present results
result_array = np.array(result_list, dtype='object')
result_array

array([[array([1.6, 0.2]), 105],
       [array([1.8, 1.2]), 35],
       [array([1.3, 1.5]), 51],
       [array([2.5, 1.9]), -15],
       [array([1.3, 0.2]), 105],
       [array([2. , 0.2]), 95],
       [array([0.1, 1.1]), 105],
       [array([0.2, 1.5]), 105],
       [array([0.3, 0.3]), 97],
       [array([1.2, 1.5]), 65],
       [array([0.1, 1.9]), 101],
       [array([1.3, 2. ]), 29],
       [array([0.2, 1.8]), 101],
       [array([1.8, 1. ]), 51],
       [array([2.3, 2.1]), -15],
       [array([0.2, 1.4]), 105],
       [array([0.3, 2.2]), 85],
       [array([1.1, 0.2]), 105],
       [array([1.2, 1.5]), 65],
       [array([1.6, 1.3]), 51]], dtype=object)

In [12]:
# identify support vector with largest softmargin score
best_supportvec = result_array[result_array[:,-1].argmax()][0]
best_supportvec

array([1.6, 0.2])

In [13]:
# Run the best performing model on the test data
score, accuracy = softmargin(data=X_test, labels=y_test, supportvec=best_supportvec)

In [14]:
print('Report of best Model on Testset: ')
print(35*'-')
print('Softmargin Score: ', score)
print('Accuracy: {:.2%}'.format(accuracy))

Report of best Model on Testset: 
-----------------------------------
Softmargin Score:  45
Accuracy: 100.00%
