In [93]:
# importing numpy library
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, accuracy_score, precision_score, recall_score, f1_score

In [28]:
class SVM():

    def __init__(self, lr, iterations, lambda_):

        self.lr = lr
        self.iters = iterations
        self.lambda_ = lambda_

    def fit(self, x, y):
        
        self.x = x
        self.y = y
        
        self.m, self.n = x.shape
        self.w = np.zeros(self.n)
        
        # bias
        self.b = 1

        # Update coeffs w/ Gradient Descent
        for i in range(self.iters):
            self.update_vals()

    def update_vals(self):
        
        target = np.where(self.y <= 0, -1, 1)

        for index, i in enumerate(self.x):

            condition = target[index] * (np.dot(i, self.w) - self.b) >= 1

            if (condition == True):

                dw = 2 * self.lambda_ * self.w
                db = 0
            
            else:

                dw = 2 * self.lambda_ * self.w - np.dot(i, target[index])
                db = target[index]

            self.w = self.w - self.lr * dw
            self.b = self.b - self.lr * db

    def predict(self, x):

        output = np.dot(x, self.w) - self.b
        predicted = np.sign(output)
        y_pred = np.where(predicted <= -1, 0, 1)

        return y_pred

In [30]:
classifier = SVM(lr=0.001, iterations=1000, lambda_ = 0.01)

In [79]:
# loading the diabetes dataset to a pandas DataFrame
features = pd.read_csv('cleaned_rain_x.csv')
target = pd.read_csv('cleaned_rain_y.csv')
target = target['RainTomorrow']
print(target)

0        0
1        0
2        0
3        0
4        0
        ..
51007    1
51008    1
51009    1
51010    1
51011    1
Name: RainTomorrow, Length: 51012, dtype: int64


In [80]:
np.unique(target)

array([0, 1], dtype=int64)

In [81]:
scaler = StandardScaler()

In [82]:
scaler.fit(features)

StandardScaler()

In [83]:
standardized_data = scaler.transform(features)

In [84]:
features = standardized_data

In [85]:
X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)

In [86]:
print(features.shape, X_train.shape, X_test.shape)

(51012, 25) (40809, 25) (10203, 25)


In [87]:
classifier = SVM(lr=0.001, iterations=1000, lambda_ = 0.01)

In [88]:
#training the support vector Machine Classifier
classifier.fit(X_train, Y_train)

In [94]:
# accuracy score on the training data
X_train_prediction = classifier.predict(X_train)
train_accuracy = accuracy_score( Y_train, X_train_prediction)
train_precision = precision_score( Y_train, X_train_prediction)
train_recall = recall_score( Y_train, X_train_prediction)
train_f1 = f1_score( Y_train, X_train_prediction)

In [95]:
print('Training Data','\n')
print('Accuracy: ', train_accuracy)
print('Precision: ', train_precision)
print('Recall: ', train_recall)
print('F1:', train_f1)

Training Data 

Accuracy:  0.9823813374500723
Precision:  0.9843550132834793
Recall:  0.9803518055759713
F1: 0.9823493310421014


In [98]:
# accuracy score on the test data
X_test_prediction = classifier.predict(X_test)
test_accuracy = accuracy_score( Y_test, X_test_prediction)
test_precision = precision_score( Y_test, X_test_prediction)
test_recall = recall_score( Y_test, X_test_prediction)
test_f1 = f1_score( Y_test, X_test_prediction)

In [99]:
print('Testing Data','\n')
print('Accuracy: ', test_accuracy)
print('Precision: ', test_precision)
print('Recall: ', test_recall)
print('F1:', test_f1)

Testing Data 

Accuracy:  0.9829461922963835
Precision:  0.985598737423555
Recall:  0.9801844222091426
F1: 0.9828841235490852
