# Stochastic Gradient Descent - Linear Regression

## Data for the linear regression model

In [747]:
import numpy as np
import matplotlib.pyplot as plt

In [748]:
# Data points
data_amount = 15
max_num = 10
X = np.random.randint(max_num, size=(data_amount, 3))

# We generate them by "knowing" the output weights for this example (this is not the case for real data!)
final_weights = np.random.rand(X.shape[1])
final_weights = final_weights / np.sum(final_weights)

final_bias = 0.2

# Corresponding labels
random_noise = np.random.rand(X.shape[0]) / 7.5 # ranges from 0-1. We divide that by 7.5 to not get to much noise in here
y = np.dot(final_weights, X.T) + final_bias + random_noise

#print('data set X\n', X)
#print('labels y\n', y)

In [749]:
# plt.plot(X,y)
X

array([[2, 4, 8],
       [3, 4, 9],
       [4, 3, 7],
       [9, 9, 1],
       [5, 0, 5],
       [2, 9, 7],
       [8, 4, 9],
       [2, 2, 2],
       [8, 4, 1],
       [7, 1, 1],
       [3, 8, 7],
       [0, 5, 2],
       [8, 7, 0],
       [8, 4, 4],
       [5, 1, 5]])

# Training and test data

In [750]:
train_len = int(data_amount * 0.75)

# We train with the following data
X_train = X[:train_len]
y_train = y[:train_len]

# We test / evaluate with the following data
X_test = X[train_len:]
y_test = y[train_len:]

## Information about the model

In [751]:
# We set the inital weights randomly
weights = np.random.rand(X.shape[1])

# The bias value is set to 1 initially
bias = np.array([1])

### Some more information

We know the regression equation:

$y_{pred}= w_1x_1 + w_2x_2 + \ldots + w_nx_n + b$

In [752]:
# What are the current results of the untrained model?
y_untrained = np.dot(weights, X_test.T) + bias
print('Outputs for our untrained model:', y_untrained)

# What are the results of the final model (that we want to achieve by updating the weights by the Stochastic gradient descent method)
y_final = np.dot(final_weights, X_test.T) + final_bias
print('Outputs for the final model:', y_final)

Outputs for our untrained model: [5.76536755 7.29454162 9.61269109 8.0398307 ]
Outputs for the final model: [1.79365064 3.89783151 5.66715158 4.76369809]


### Loss function

We want to use the mean squarred error to calculate the loss for the model outputs which is defined as follows:

$$MSE = \frac{1}{n}\sum_{i=1}^n (y_i-y_{i_{pred}})^2$$

In [753]:
mse = lambda y, y_pred: np.mean(np.sum((y-y_pred)**2))

In [754]:
# In our example the loss for our untrained model is:
loss_untrained = mse(y_test, y_untrained)
print('The loss of the untrained model is:', loss_untrained)

# Loss for the final model
loss_final = mse(y_test, y_final)
print('The loss of the final model is:', loss_final)

The loss of the untrained model is: 52.508733106896
The loss of the final model is: 0.007930060869604577


## Your stochastic gradient descent implementation to optimize the weights of your model

In [755]:
# Summary on what we know so far:

# We know the loss function: Variable 'mse' (Mean squared error)
# We know the initial weights that we want to optimize: variable 'weights'
# We know the initial bias value: variable 'bias'

In [756]:
# Use the training data to optimize the weights of the linear regression model

# use these variables for your sgd implementation
learning_rate = 0.005
iterations = 1000

# YOUR CODE FOR THE STOCHASTIC GRADIENT DESCENT IMPLEMENTATION

## Stochastic

In [757]:
class MLR:
    learning_rate = 0.005
    X_train, Y_train = None, None
    
    def __init__(self, learning_rate: float) -> None:
        self.learning_rate = learning_rate
        return None
    
    def fit(self, X_train: np.ndarray, y_train: np.ndarray, epochs: int):
        self.X_train, self.Y_train = X_train, y_train
        return self.SGD(epochs=epochs)
    
    def SGD(self, epochs: int) -> np.ndarray:
        n_samples, n_features = self.X_train.shape
        coeffs = np.random.rand(n_features)
        bias = np.random.rand()

        # perform operation for all epochs
        for i in range(epochs):
            y_pred = np.dot(self.X_train, coeffs) + bias
            # error = np.log(((y_pred - self.Y_train)**2)) / n_samples
            error = y_pred - self.Y_train

            # update weights
            dw = -(2/n_samples) * np.dot(X_train.T, error)
            # update bias
            db = -(2/n_samples) * np.sum(error)

            coeffs += self.learning_rate * dw
            bias += self.learning_rate * db
        
        return coeffs, bias

In [758]:
model = MLR(learning_rate=learning_rate)
coeffs, bias = model.fit(X_train=np.array(X_train), y_train=np.array(y_train), epochs=iterations)
coeffs, bias

(array([0.37405222, 0.10586693, 0.53279039]), 0.182546900807173)

In [759]:
final_weights, final_bias

(array([0.3667879 , 0.10907548, 0.52413663]), 0.2)

## Compare the results with the Test data

In [760]:
y_pred = np.dot(coeffs, X_test.T) + bias
y_actual = np.dot(final_weights, X_test.T) + final_bias

print(f"MSE: {mse(y_pred, y_test)}")

MSE: 0.004947134196757129
