In [1]:
import pandas as pd
import numpy as np

In [2]:

def normalize(data_to_noramlize):
    
    #Converting Training Data from type Dataframe to Array for Matrix Operation
    pure_training_data=np.array(data_to_noramlize)
    
    #Dividing Each Column of Training Data with respective Column Maximium Value to Normalize the Dataset
    return(pure_training_data/np.max(pure_training_data,axis=0))

In [3]:
class SVM:
    def __init__(self) -> None:
        self.lr = 0.5
        self.lam = 0.001
        self.max_iter = 500
        self.tol = 1e-6

        self._weigths = None
    
    def _hinge_loss(self, X, y):
        # calculate hinge loss
        N = X.shape[0]
        distances = 1 - y * (np.dot(X, self._weigths))
        distances[distances < 0] = 0  # equivalent to max(0, distance)
        hinge_loss =  (np.sum(distances) / N)
    
        # calculate cost
        loss = self.lam / 2 * np.dot(self._weigths, self._weigths) + hinge_loss
        return loss

    def _calc_gradient(self, X, y):
        distance = 1 - y * (np.dot(X, self._weigths))
        dw = np.zeros(len(self._weigths))
        t = np.where(y <= 0, -1, 1)
        for ind, d in enumerate(distance):
            condition = t[ind] * (np.dot(X[ind], self._weigths)) >= 1
            if condition:
                dw += 2 * self.lam * self._weigths
            else:
                dw += 2 * self.lam * self._weigths - np.dot(X[ind], t[ind])
        return dw/len(y)
            

    def fit(self, X, y):
        bias = np.ones((X.shape[0], 1))
        X = np.concatenate((bias, X), axis=1)
        self._weigths = np.zeros(X.shape[1])
        for epoch in range(self.max_iter):
            idx = np.random.permutation(len(X))
            x_shuffled = X[idx]
            y_shuffled = y[idx]

            dw = self._calc_gradient(x_shuffled, y_shuffled)
            self._weigths -= self.lr * dw

            print('Epoch {} Loss: {}'.format(epoch, self._hinge_loss(X, y)))

    def predict(self, X):
        return np.sign(np.dot(X, self._weigths))
    
    def score(self, X, y):
        bias = np.ones((X.shape[0], 1))
        X = np.concatenate((bias, X), axis=1)
        preds = self.predict(X)
        t = np.where(y <= 0, -1, 1)
        print(np.unique(preds, return_counts=True))
        return (preds == t).sum()/t.size


In [4]:
def get_features_labels(df):
    labels = df['decision']
    train_features = df.drop('decision', 1)
    return train_features.to_numpy(), labels.to_numpy()

In [5]:
train_data = pd.read_csv('./trainingSet.csv')
X_train, y_train = get_features_labels(train_data)


In [6]:
model = SVM()

In [7]:
model.fit(X_train, y_train)

Epoch 0 Loss: 73.88518002490112
Epoch 1 Loss: 0.6007227826492731
Epoch 2 Loss: 146.90133295442288
Epoch 3 Loss: 0.5809779330469369
Epoch 4 Loss: 219.78267044977133
Epoch 5 Loss: 0.5723941323534867
Epoch 6 Loss: 292.5294174983908
Epoch 7 Loss: 26.676691714711538
Epoch 8 Loss: 0.6284350546813755
Epoch 9 Loss: 99.7841269352965
Epoch 10 Loss: 0.6055535030450271
Epoch 11 Loss: 172.75657657978033
Epoch 12 Loss: 0.5938515773302279
Epoch 13 Loss: 245.59426592650934
Epoch 14 Loss: 0.6213133906222962
Epoch 15 Loss: 254.9677306693524
Epoch 16 Loss: 1.065480748536354
Epoch 17 Loss: 125.79008862609064
Epoch 18 Loss: 0.616444071157469
Epoch 19 Loss: 198.71701337353957
Epoch 20 Loss: 0.6111684799691923
Epoch 21 Loss: 271.30331788410905
Epoch 22 Loss: 8.26256371947386
Epoch 23 Loss: 0.637218803447034
Epoch 24 Loss: 183.63238504320725
Epoch 25 Loss: 0.6317611960176102
Epoch 26 Loss: 256.2487036515593
Epoch 27 Loss: 2.4448105032866243
Epoch 28 Loss: 32.32162678623083
Epoch 29 Loss: 0.6837051647823391
Ep

In [8]:
model.score(X_train, y_train)

(array([-1.,  1.]), array([1095, 4105], dtype=int64))


0.6273076923076923

In [10]:
test_data = pd.read_csv('./testSet.csv')
X_test, y_test = get_features_labels(test_data)

In [11]:
model.score(X_test, y_test)

(array([-1.,  1.]), array([ 265, 1035], dtype=int64))


0.6230769230769231