# Implementation of Boosting Support Vector Machines

In [1]:
# Libraries
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
import pandas as pd
from sklearn.model_selection import train_test_split

## Classifier class

In [2]:
class BoostingSVM:
    def __init__(self, x=None,y=None, sigma_ini = 100, sigma_min = 1, sigma_step = 0.5):
        """
        This is a custom implementation of the Boosting SVM algorithm. It is based 
        on the combination of Adaboost and RFB SVM weak learners. To create the model,
        this class needs the following inputs:
        
        X_train: Training features. Size N x D
        y_train: Training labels. Size N x 1

        """
        self.sigma_ini = sigma_ini
        self.sigma_step = sigma_step
        self.sigma_min = sigma_min
    
    def fit(self,X_train,y_train):
        sigma = self.sigma_ini 
        
        # Initialize weights
        number_samples = np.shape(X_train)[0]
        weights = np.ones(number_samples)/number_samples
        
        # Define vectors to store weak predictors and significances of each iteration
        self.weak_learners = [] #np.zeros(shape=self.number_iterations, dtype=object)
        self.significance_vec = [] #np.zeros(shape=self.number_iterations)
        
        # Todo: Apply dimensionality reduction
        
        #for iterations in range(self.number_iterations):
        while sigma > self.sigma_min:
            print('Sigma: %.1f' % sigma)
            #print('BoostSVM iteration: %d' % (iterations))
            current_weights = weights
            
            # Create and save week learner for this iteration
            weak_learner = SVC(kernel='rbf', gamma = 1/2/sigma**2) #SVC(max_iter=10,tol=5)
            weak_learner_model = weak_learner.fit(X_train, y_train, sample_weight=current_weights)

            # The new weak learner model is saved
            self.weak_learners.append(weak_learner_model)
            weak_learner_pred = weak_learner_model.predict(X_train)
            
            # Calculate error
            error = np.sum(current_weights[np.where(weak_learner_pred != y_train)[0]]) 
            
            if error > 0.5:
                sigma = sigma - self.sigma_step
            else:
                # Significance of the weak learner model is calculated and saved
                significance = 0.5*np.log((1-error)/error) 
                self.significance_vec.append(significance)

                # Update weights for each sample
                idx_incorrect = np.where(weak_learner_pred != y_train)[0]
                idx_correct = np.where(weak_learner_pred == y_train)[0]
                weights[idx_incorrect] = np.multiply(current_weights[idx_incorrect],np.exp(significance))
                weights[idx_correct] = current_weights[idx_correct]*np.exp(-significance)

                # Normalize weights
                weights /= weights.sum()
        
    def predict(self,X_test):
        model_preds = np.array([model.predict(X_test) for model in self.weak_learners])
        y_test_pred = np.sign(np.dot(self.significance_vec, model_preds))
        return y_test_pred.astype(int)

## Testing

In [3]:
# Retrieve and prepare dataset
data = pd.read_pickle("./data.pkl")

# split data into X and y
X = data.loc[:, data.columns != 'bot']
Y = data.loc[:, data.columns == 'bot'].to_numpy()

# split data into train and test sets
seed = 10
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

# Labels for this algortihm has to be either 1 or -1
y_train = np.where(y_train < 0.5, -1, 1)
y_test = np.where(y_test < 0.5, -1, 1)

y_train = y_train.flatten()
y_test = y_test.flatten()

In [None]:
# Create models
#classifierSVM = SVC()
classifierBoostSVM = BoostingSVM()
#classifierAda = AdaBoostClassifier(n_estimators=100)

#classifierSVM.fit(X_train, y_train)
print('VSC - OK')
classifierBoostSVM.fit(X_train, y_train)
print('BoostSVM - OK')
#classifierAda.fit(X_train, y_train)
print('AdaBoost - OK')

#SVMpredictions = classifierSVM.predict(X_test)
BoostSVMpredictions =classifierBoostSVM.predict(X_test)
#Adapredictions = classifierAda.predict(X_test)

#print('Accuracy with default SVM:      %.4f' % (len(np.where(y_test==SVMpredictions)[0])/len(y_test)))
print('Accuracy with our Boosting SVM: %.4f' % (len(np.where(y_test==BoostSVMpredictions)[0])/len(y_test)))
#print('Accuracy with AdaBoost:         %.4f' % (len(np.where(y_test==Adapredictions)[0])/len(y_test)))

VSC - OK
Sigma: 100.0
Sigma: 100.0
Sigma: 100.0
Sigma: 99.5
Sigma: 99.0
Sigma: 98.5
Sigma: 98.0
Sigma: 97.5
Sigma: 97.0
Sigma: 96.5
Sigma: 96.0
Sigma: 95.5
Sigma: 95.0
Sigma: 94.5
Sigma: 94.0
Sigma: 93.5
Sigma: 93.0
Sigma: 92.5
Sigma: 92.0
Sigma: 91.5
Sigma: 91.0
Sigma: 90.5
Sigma: 90.0
Sigma: 89.5
Sigma: 89.0
Sigma: 88.5
Sigma: 88.0
Sigma: 87.5
Sigma: 87.0
Sigma: 86.5
Sigma: 86.0
Sigma: 85.5
Sigma: 85.0
Sigma: 84.5
Sigma: 84.0
Sigma: 83.5
Sigma: 83.0
Sigma: 82.5
Sigma: 82.0
Sigma: 81.5
Sigma: 81.0
Sigma: 80.5
Sigma: 80.0
Sigma: 79.5
Sigma: 79.0
Sigma: 78.5
Sigma: 78.0
Sigma: 77.5
Sigma: 77.0
Sigma: 76.5
Sigma: 76.0
Sigma: 75.5
Sigma: 75.0
Sigma: 74.5
Sigma: 74.0
Sigma: 73.5
Sigma: 73.0
Sigma: 72.5
Sigma: 72.0
Sigma: 71.5
Sigma: 71.0
Sigma: 70.5
Sigma: 70.0
Sigma: 69.5
Sigma: 69.0
Sigma: 68.5
Sigma: 68.0
Sigma: 67.5
Sigma: 67.0
Sigma: 66.5
Sigma: 66.0
Sigma: 65.5
Sigma: 65.0
Sigma: 64.5
Sigma: 64.0
Sigma: 63.5
Sigma: 63.0
Sigma: 62.5
Sigma: 62.0
Sigma: 61.5
Sigma: 61.0
Sigma: 60.5
Sigm

In [None]:
print('Accuracy with our Boosting SVM: %.4f' % (len(np.where(y_test==BoostSVMpredictions)[0])/len(y_test)))

In [None]:
## Considering the option of dimension reduction to speed-up SVM boosting.

In [None]:
# We will consider 