# Stochastic Gradient Descent - Linear Regression

## Data for the linear regression model

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Data points
data_amount = 15
max_num = 10
X = np.random.randint(max_num, size=(data_amount, 3))

# We generate them by "knowing" the output weights for this example (this is not the case for real data!)
final_weights = np.random.rand(X.shape[1])
final_weights = final_weights / np.sum(final_weights)

final_bias = 0.2

# Corresponding labels
random_noise = np.random.rand(X.shape[0]) / 7.5 # ranges from 0-1. We divide that by 7.5 to not get to much noise in here
y = np.dot(final_weights, X.T) + final_bias + random_noise

#print('data set X\n', X)
#print('labels y\n', y)

In [3]:
# plt.plot(X,y)
X

array([[1, 3, 1],
       [0, 2, 4],
       [9, 9, 8],
       [3, 9, 2],
       [9, 8, 5],
       [7, 1, 5],
       [4, 6, 9],
       [7, 6, 5],
       [6, 1, 8],
       [5, 4, 4],
       [5, 8, 1],
       [6, 3, 5],
       [0, 0, 0],
       [0, 8, 8],
       [0, 6, 1]])

# Training and test data

In [4]:
train_len = int(data_amount * 0.75)

# We train with the following data
X_train = X[:train_len]
y_train = y[:train_len]

# We test / evaluate with the following data
X_test = X[train_len:]
y_test = y[train_len:]

## Information about the model

In [5]:
# We set the inital weights randomly
weights = np.random.rand(X.shape[1])

# The bias value is set to 1 initially
bias = np.array([1])

### Some more information

We know the regression equation:

$y_{pred}= w_1x_1 + w_2x_2 + \ldots + w_nx_n + b$

In [6]:
# What are the current results of the untrained model?
y_untrained = np.dot(weights, X_test.T) + bias
print('Outputs for our untrained model:', y_untrained)

# What are the results of the final model (that we want to achieve by updating the weights by the Stochastic gradient descent method)
y_final = np.dot(final_weights, X_test.T) + final_bias
print('Outputs for the final model:', y_final)

Outputs for our untrained model: [2.62215169 1.         3.23270443 2.37664631]
Outputs for the final model: [4.25316544 0.2        7.97185783 3.60986305]


### Loss function

We want to use the mean squarred error to calculate the loss for the model outputs which is defined as follows:

$$MSE = \frac{1}{n}\sum_{i=1}^n (y_i-y_{i_{pred}})^2$$

In [7]:
mse = lambda y, y_pred: np.mean(np.sum((y-y_pred)**2))

In [8]:
# In our example the loss for our untrained model is:
loss_untrained = mse(y_test, y_untrained)
print('The loss of the untrained model is:', loss_untrained)

# Loss for the final model
loss_final = mse(y_test, y_final)
print('The loss of the final model is:', loss_final)

The loss of the untrained model is: 28.21446811141517
The loss of the final model is: 0.022926779224283968


## Your stochastic gradient descent implementation to optimize the weights of your model

In [9]:
# Summary on what we know so far:

# We know the loss function: Variable 'mse' (Mean squared error)
# We know the initial weights that we want to optimize: variable 'weights'
# We know the initial bias value: variable 'bias'

In [10]:
# Use the training data to optimize the weights of the linear regression model

# use these variables for your sgd implementation
learning_rate = 0.005
iterations = 1000

# YOUR CODE FOR THE STOCHASTIC GRADIENT DESCENT IMPLEMENTATION

In [35]:
class MLR:
    learning_rate = 0.005
    X_train, Y_train = None, None
    
    def __init__(self, learning_rate: int) -> None:
        self.learning_rate = learning_rate
        return None
    
    def fit(self, X_train: np.ndarray, y_train: np.ndarray):
        self.X_train, self.Y_train = X_train, y_train
        self.SGD()
        return self
    
    def SGD(self) -> np.ndarray:
        coeffs = np.random.rand(1, X_train.shape[1])
        print(coeffs)
        n = len(self.Y_train) # number of samples
        # X_train.shape[1] coefficients are weights
        # X_train.shape[1]'th coefficient is the bias
        error = np.sum(self.Y_train - np.dot(self.X_train, coeffs.T))
        for coeff_index in range(X_train.shape[1]):
            derivative = (-2/n) * error * X_train
            print(coeffs[coeff_index], self.learning_rate * derivative)
            coeffs[coeff_index] -= (self.learning_rate * derivative)
        print(coeffs)
            

In [36]:
model = MLR(learning_rate=learning_rate)
model = model.fit(X_train=np.array(X_train), y_train=np.array(y_train).reshape(-1, 1))

[[0.53341767 0.84593934 0.82440845]]
[0.53341767 0.84593934 0.82440845] [[0.05767118 0.17301355 0.05767118]
 [0.         0.11534236 0.23068473]
 [0.51904064 0.51904064 0.46136946]
 [0.17301355 0.51904064 0.11534236]
 [0.51904064 0.46136946 0.28835591]
 [0.40369827 0.05767118 0.28835591]
 [0.23068473 0.34602709 0.51904064]
 [0.40369827 0.34602709 0.28835591]
 [0.34602709 0.05767118 0.46136946]
 [0.28835591 0.23068473 0.23068473]
 [0.28835591 0.46136946 0.05767118]]


ValueError: non-broadcastable output operand with shape (3,) doesn't match the broadcast shape (11,3)

## Compare the results with the Test data