## Class ---> 'Mini-Batch Gradient Descent' with Kaggle dataset

In [15]:
# IMPORTS
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [16]:
# Load the data
data = pd.read_csv("Dataset/Salary_dataset.csv")
data = data.drop("Unnamed: 0", axis=1)
data.head()

Unnamed: 0,YearsExperience,Salary
0,1.2,39344.0
1,1.4,46206.0
2,1.6,37732.0
3,2.1,43526.0
4,2.3,39892.0


In [17]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 2 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   YearsExperience  30 non-null     float64
 1   Salary           30 non-null     float64
dtypes: float64(2)
memory usage: 612.0 bytes


In [18]:
# Get X and y variables
X = data[['YearsExperience']].values 
y = data[['Salary']].values

# Scale the data
scaler = StandardScaler()
scaled_X = scaler.fit_transform(X)

# Get training and testing data
X_train, X_test, y_train, y_test = train_test_split(scaled_X, y, test_size=0.2, random_state=42)

In [19]:
# class ---> 'Mini-Batch Gradient Descent' algo
class MBGD:
    def __init__(self, learning_rate=0.01, n_epochs=1000, batch_size=5, tolerance=1e-6):
        self.learning_rate_ = learning_rate
        self.n_epochs_ = n_epochs
        self.batch_size_ = batch_size
        self.tolerance_ = tolerance
        self.weights = None
        self.bias = None

    def predict(self, X):
        return X @ self.weights + self.bias # y = mx + b
    
    def MSE(self, y_true, y_pred):
        return np.mean((y_true-y_pred)**2)
    
    def fit(self, X, y):
        m, n = X.shape
        self.weights = np.random.randn(n, 1)
        self.bias = 0

        for epoch in range(self.n_epochs_):
            random_samples = np.random.choice(m, self.batch_size_, replace=False)
            xi = X[random_samples]
            yi = y[random_samples]
            y_pred = self.predict(xi)
            error = y_pred - yi
            w_grad = 2 * xi.T @ error / self.batch_size_ # divide by 'batch_size'
            b_grad = 2 * np.mean(error)
            self.weights = self.weights - self.learning_rate_ * w_grad
            self.bias = self.bias - self.learning_rate_ * b_grad

            if np.linalg.norm(w_grad) < self.tolerance_ and np.abs(b_grad) < self.tolerance_:
                print(f"Converged at epoch: {epoch}")
                break
        return self.weights, self.bias 

In [20]:
# Create an instance + evaluate
mbgd = MBGD()
weights, bias = mbgd.fit(X_train, y_train)
print(f"Weights: {weights} - Bias: {bias}")
preds = mbgd.predict(X_test)
RMSE = np.sqrt(mbgd.MSE(y_test, preds))
print(f"RMSE: {RMSE}")

Weights: [[26148.50524716]] - Bias: 75341.65928357429
RMSE: 7090.784081467546


In [21]:
# Compare with sklearn
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)
new_preds = lin_reg.predict(X_test)
RMSE = mean_squared_error(y_test, new_preds, squared=False)
print(f"RMSE: {RMSE}")

RMSE: 7059.043621901506
