### Imports

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

---
### Upload Dataset

In [3]:
X_und = pd.read_csv("cleaned_rain_x.csv")
y_und = pd.read_csv("cleaned_rain_y.csv")

---
### Split into testing and training data

In [71]:
x_train, x_test, y_train, y_test = train_test_split(X_und, y_und, test_size=0.25, random_state=42)

In [72]:
x_train.drop(columns=['Unnamed: 0'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


---
### Create 'Total' Train DataFrame

In [5]:
x_train_total = pd.concat([x_train, y_train], axis=1)
x_train_total.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
x_train_total.head()

Unnamed: 0,MinTemp,MaxTemp,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,RainToday,year,...,WindGustDirW,WindDir9amN,WindDir9amE,WindDir9amS,WindDir9amW,WindDir3pmN,WindDir3pmE,WindDir3pmS,WindDir3pmW,RainTomorrow
22845,7.2,13.5,46.0,17.0,31.0,76.0,61.0,1021.8,1,2015,...,1,0,0,0,1,0,0,1,1,0
42446,21.2,28.8,48.0,19.0,24.0,78.0,97.0,1009.6,0,2014,...,1,1,1,0,0,0,0,0,1,1
5989,16.8,28.0,39.0,19.0,22.0,74.0,60.0,1012.4,0,2015,...,0,0,0,1,1,0,1,1,0,0
39347,12.5,24.4,50.0,6.0,7.0,78.0,43.0,1014.6,0,2014,...,0,0,0,1,1,0,1,1,0,1
31998,6.0,17.2,33.0,9.0,22.0,95.0,55.0,1029.4,1,2009,...,0,1,0,0,1,0,1,1,0,1


---
# Evaluators
---

In [6]:
def TF(y_actu, y_pred):
    TP = np.sum(np.logical_and(y_pred == 1, y_actu == 1))
    TN = np.sum(np.logical_and(y_pred == 0, y_actu == 0))
    FP = np.sum(np.logical_and(y_pred == 1, y_actu == 0))
    FN = np.sum(np.logical_and(y_pred == 0, y_actu == 1))
    return TP, FP, TN, FN

In [7]:
def eval(models, test_x, test_y):
    for m in models:
        model = m[0]
        TP, FP, TN, FN = TF(test_y, np.array(model.predict(test_x)))
        acc = (TP + TN) / (TP + TN + FP + FN)
        pre = TP / (TP + FP)
        re = TP / (FN + TP)
        f1 = (2 * (pre * re)) / (pre + re)
        scoring = {m[1]:{"Accuracy":acc, "Precision":pre,"Recall":re,"F1":f1}}
    return pd.DataFrame(scoring)

---
# SVM Implementation
---

In [122]:
class SVM():

    def __init__(self, lr=0.001, iterations=1000, lambda_ = 0.01):

        self.lr = lr
        self.iters = iterations
        self.lambda_ = lambda_
        self.fit()

    def fit(self, x=x_train, y=y_train["RainTomorrow"]):
        
        self.x = x
        self.y = y
        
        self.m, self.n = x.shape
        self.w = np.zeros(self.n)
        
        # bias
        self.b = 1

        # Update coeffs w/ Gradient Descent
        for i in range(self.iters):
            self.update_vals()

    def update_vals(self):
        
        target = np.where(self.y <= 0, -1, 1)

        for index, i in self.x.iterrows():

            condition = (target[index] * (np.matmul(i, self.w) - self.b) >= 1)

            if (condition == True):

                dw = 2 * self.lambda_ * self.w
                db = 0
            
            else:

                x = np.matmul(i, target[index])
                dw = 2 * self.lambda_ * self.w - x
                db = target[index]

            self.w = self.w - self.lr * dw
            self.b = self.b - self.lr * db

    def predict(self, x_test):

        output = np.matmul(x, self.w) - self.b
        predicted = np.sign(output)
        y_pred = np.where(predicted <= -1, 0, 1)

        return y_pred

---
# RESULTS
---

In [123]:
SVM = SVM()
eval([[SVM, "SVM"]], x_test, y_test['RainTomorrow'])

Cond:  False


IndexError: tuple index out of range