### Imports

In [22]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import math
from math import log

---
### Upload Dataset

In [23]:
X_und = pd.read_csv("cleaned_rain_x.csv")
y_und = pd.read_csv("cleaned_rain_y.csv")

---
### Split into testing and training data

In [24]:
x_train, x_test, y_train, y_test = train_test_split(X_und, y_und, test_size=0.25, random_state=42)

---
### Create 'Total' Train DataFrame

In [25]:
x_train_total = pd.concat([x_train, y_train], axis=1)
x_train_total.drop(columns=['Unnamed: 0'], axis=1, inplace=True)
x_train_total.head()

Unnamed: 0,MinTemp,MaxTemp,WindGustSpeed,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,RainToday,year,...,WindGustDirW,WindDir9amN,WindDir9amE,WindDir9amS,WindDir9amW,WindDir3pmN,WindDir3pmE,WindDir3pmS,WindDir3pmW,RainTomorrow
22845,7.2,13.5,46.0,17.0,31.0,76.0,61.0,1021.8,1,2015,...,1,0,0,0,1,0,0,1,1,0
42446,21.2,28.8,48.0,19.0,24.0,78.0,97.0,1009.6,0,2014,...,1,1,1,0,0,0,0,0,1,1
5989,16.8,28.0,39.0,19.0,22.0,74.0,60.0,1012.4,0,2015,...,0,0,0,1,1,0,1,1,0,0
39347,12.5,24.4,50.0,6.0,7.0,78.0,43.0,1014.6,0,2014,...,0,0,0,1,1,0,1,1,0,1
31998,6.0,17.2,33.0,9.0,22.0,95.0,55.0,1029.4,1,2009,...,0,1,0,0,1,0,1,1,0,1


---
# Evaluators
---

In [40]:
def TF(y_actu, y_pred):
    TP = np.sum(np.logical_and(y_pred == 1, y_actu == 1))
    TN = np.sum(np.logical_and(y_pred == 0, y_actu == 0))
    FP = np.sum(np.logical_and(y_pred == 1, y_actu == 0))
    FN = np.sum(np.logical_and(y_pred == 0, y_actu == 1))
    return TP, FP, TN, FN

In [41]:
def eval(models, test_x, test_y):
    for m in models:
        model = m[0]
        TP, FP, TN, FN = TF(test_y, np.array(model.predict(test_x)))
        acc = (TP + TN) / (TP + TN + FP + FN)
        pre = TP / (TP + FP)
        re = TP / (FN + TP)
        f1 = (2 * (pre * re)) / (pre + re)
        scoring = {m[1]:{"Accuracy":acc, "Precision":pre,"Recall":re,"F1":f1}}
    return pd.DataFrame(scoring)

---
# Gradient Descent Logistic Regression Implementation
---

In [44]:
class GDLogReg:

      #### Helper Functions ##########

    def add_const(self, dataframe):
        n, k = dataframe.shape
        ones = np.ones((n, 1))
        return np.concatenate([ones, dataframe], axis = 1)

    def sigmoid(self, t):
        return 1/(1 + np.exp(-t))

    def ce(self, pred, y):
        y = y.to_numpy()
        total = 0
        for i in range(len(pred)):
            if pred[i] == 0:
                x = 0.0000001
            else:
                x = pred[i]
            total += y[i] * log(x)
        return -total

      ################################

    def __init__(self):
        self.theta = None
        self.fit()

    def fit(self, X=x_train, y=y_train["RainTomorrow"], l_rate=.01, epochs=101):
        X = self.add_const(X)

        n, d = X.shape

        # 1. initialize theta at random
        self.theta = np.zeros((d, ))

        # 2. repeat until stopping conditions
        for epoch in range(epochs):
          
            theta_old = self.theta
          
            prediction = self.sigmoid(X @ theta_old)
          
            gradient = np.matmul(X.T, (prediction-y)) / n
          
            # 3. update theta
            self.theta -= l_rate * gradient

            cross_entropy = self.ce(prediction, y)
        return self

    def predict(self, X=x_test):
        if self.theta is None:
            raise RuntimeError('Model has not been fit yet')
        X = self.add_const(X)
        return np.rint(self.sigmoid(X @ self.theta)).astype(int)

---
# RESULTS
---

In [45]:
LR = GDLogReg()
eval([[LR, "Logistic Regression"]], x_test, y_test['RainTomorrow'])

  return 1/(1 + np.exp(-t))


5403 0 6326 1024


Unnamed: 0,Logistic Regression
Accuracy,0.919705
F1,0.91344
Precision,1.0
Recall,0.840672
