# Lesson 5 - Assignment

In this assignment, you will implement a Support Vector Machine Classifier  from scratch and compare the results to existing sklearn algorithm.

In [154]:
# import packages
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from matplotlib.legend_handler import HandlerLine2D
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# make this notebook's output stable across runs
np.random.seed(0)

Question 1.1: Implement the cost function cost/objective function:
<img src="https://miro.medium.com/max/688/1*JAS6rUTO7TDlrv4XZSMbsA.png" alt="drawing" width="600"/>


In [155]:
def compute_cost(W, X, Y, reg_strength=1000):
    shape = X.shape[0]
    distances = 1 - Y * (np.dot(X, W)) # array of distances
    distances[distances < 0] = 0 # if it's negative, just set it to 0
    loss = reg_strength * (np.sum(distances) / shape)
    cost = 1 / 2 * np.dot(W, W) + loss # Do not need to do anything with b here as we insert it as part of the dataset (intercept)
    return cost

Question 1.2: Write a method that calculate the cost gradient:
<img src="https://miro.medium.com/max/866/1*ww3F21VMVGp2NKhm0VTesA.png" alt="drawing" width="600"/>

In [156]:
def calculate_cost_gradient(W, X_batch, Y_batch, reg_strength=1000):
    # type(Y_batch) == integer, so we need to convert it into an np array
    Y_batch = np.array([Y_batch])
    X_batch = np.array([X_batch])
    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))
    for index, d in enumerate(distance):
        if max(0, d) == 0:
          di = W
        else:
          di = W - (reg_strength * Y_batch[index] * X_batch[index])
        dw += di
    dw = dw/len(Y_batch)
    return dw

Question 1.3: Write a method that performs stochastic Gradient descent as follows:
- Caluclate the gradient of cost function i.e. ∇J(w)
- Update the weights in the opposite direction to the gradient: w = w — ∝(∇J(w))
- Repeat until conversion or until 5000 epochs are reached

In [157]:
def sgd(data, outputs, learning_rate = 0.000001, max_epochs = 5000):
    weights = np.zeros(data.shape[1])
    nth = 0
    prev_cost = float("inf") # infinite cost! makes sense with gradient
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(data, outputs)
        for index, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[index])
            weights = weights - (learning_rate * ascent)
            # convergence check on 2^nth epoch
            if epoch == 2 ** nth or epoch == max_epochs - 1:
                cost = compute_cost(weights, data, outputs)
                print("Epoch is: {} and Cost is: {}".format(epoch, cost))
                # stoppage criterion, or when we want to stop descending
                if abs(prev_cost-cost) < cost_threshold * prev_cost:
                    return weights
                prev_cost = cost
                nth += 1
    return weights

# Dataset

In [158]:
data = pd.read_csv('data_banknote_authentication.csv')

print(data.head())

Y = data.iloc[:, -1]
X = data.iloc[:, 1:4]
X.insert(loc=len(X.columns), column='intercept', value=1) #inserts the B column

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

    3.6216  8.6661  -2.8073  -0.44699  0
0  4.54590  8.1674  -2.4586  -1.46210  0
1  3.86600 -2.6383   1.9242   0.10645  0
2  3.45660  9.5228  -4.0112  -3.59440  0
3  0.32924 -4.4552   4.5718  -0.98880  0
4  4.36840  9.6718  -3.9606  -3.16250  0


Question 4: Train and evaluate an SVC using the banknote_authentication data

In [159]:
# train the model
print("training started...")
W = sgd(X_train.to_numpy(), y_train.to_numpy()) # this results in an extremely low accuracy
print("training finished.")
print("weights are: {}".format(W))

y_test_predicted = np.array([])
for i in range(X_test.shape[0]):
    yp = np.sign(np.dot(W, X_test.to_numpy()[i]))
    y_test_predicted = np.append(y_test_predicted, yp)
print("accuracy on test dataset with initial weights (W) using SGD: {}".format(accuracy_score(y_test.to_numpy(), y_test_predicted)))


# testing a sklearn model for comparison quickly
print("sklearn linear kernel training used for weights started...")
from sklearn.svm import SVC
svc_linear_clf = SVC(kernel='linear', random_state=42)
svc_linear_clf.fit(X_train, y_train)
W2 = svc_linear_clf.coef_
print("sklearn linear kernel training finished....")
print("weights are: {}".format(W2))

y_test_predicted_w2 = np.array([])
for i in range(X_test.shape[0]):
    yp2 = np.sign(np.dot(W2, X_test.to_numpy()[i]))
    y_test_predicted_w2 = np.append(y_test_predicted_w2, yp2)
print("accuracy on test dataset with initial weights (W2) using sklearn linear kernel: {}".format(accuracy_score(y_test.to_numpy(), y_test_predicted_w2)))


training started...
Epoch is: 1 and Cost is: 1000.0
Epoch is: 2 and Cost is: 722.963263375125
Epoch is: 4 and Cost is: 627.73561795842
Epoch is: 8 and Cost is: 564.3342958674476
Epoch is: 16 and Cost is: 557.8744461848763
Epoch is: 32 and Cost is: 557.8453955476957
training finished.
weights are: [-0.00950043  0.02362466 -0.10625211  1.15041795]
accuracy on test dataset with initial weights (W) using SGD: 0.44808743169398907
sklearn linear kernel training used for weights started...
sklearn linear kernel training finished....
weights are: [[-5.09637313e-01 -3.49295369e-01 -5.31564418e-01 -2.39808173e-14]]
accuracy on test dataset with initial weights (W2) using sklearn linear kernel: 0.30965391621129323


[Bonus] Question 5: Train and evaluate an SKLEARN SVC model, and compare the results to your model

# Note

Since I did the linear above (which had extremely low values), I'll do 'rbf' kernel here.

In [160]:
from sklearn.svm import SVC
svc_clf = SVC(kernel='rbf', random_state=42, C=10) # C=float("inf") # linear gives lower accuracy
svc_clf.fit(X_train, y_train)
y_pred_svc_clf = svc_clf.predict(X_test)

In [161]:
from sklearn.metrics import confusion_matrix, accuracy_score
print(confusion_matrix(y_test, y_pred_svc_clf))
accuracy_score(y_test, y_pred_svc_clf)

[[289  14]
 [ 32 214]]


0.9162112932604736

Question 6: Create a new text cell in your Notebook: Complete a 50-100 word summary (or short description of your thinking in applying this week's learning to the solution) of your experience in this assignment. Include: What was your incoming experience with this model, if any? what steps you took, what obstacles you encountered. how you link this exercise to real-world, machine learning problem-solving. (What steps were missing? What else do you need to learn?) This summary allows your instructor to know how you are doing and allot points for your effort in thinking and planning, and making connections to real-world work.

# Q.6

1. I have experience with these models mostly in an anecdotal way, testing various apps and tools for image recognition (ie: hot dog, not hot dog tests).
2. I am admittedly not great at the math portion so there's a lot of referring to notes and resouces, remembering variables meanings, etc.
3. I would absolutely apply this model to any sort of binary classification heavily used in visual or audio recognition models.
4. Some steps I would've found helpful would've been a visualization of the output. I think the visuals are really nice with SVC models. I tried to create one myself but got hung up on it so I removed it.

In [136]:
%%shell

jupyter nbconvert --to html /content/Lab04.ipynb