# Linear Regression Gradient descent

## Step 0 - import all libraries

In [None]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams["figure.figsize"] = (12, 9) 

## Step 1 - read data from csv. 
We use the pandas built-in function <br>
Data must also be standardized to avoid computational errors

`data=(data-data.mean())/data.std() # standatization`

In [45]:
def read_real_dataset(dims_3 = False):

    ''' read real dataset from home_data.csv '''

    #             y             x1
    columns = ['price', 'sqft_living']

    if dims_3:     #x2   
        columns += ['sqft_above']

    data = pd.read_csv('./dataset/home_data.csv').loc[:, columns]
    # data.hist(bins=150) # check dataset distribution
    data=(data-data.mean())/data.std() # standatization
    x, y = data.iloc[:, 1:].to_numpy(), data.iloc[:,0].to_numpy()
    x_0 = np.ones(len(x))
    x = np.column_stack((x_0, x))
    return x, y

## Step 2 - define predict function, cost function and function that count a gradient of cost function

In [46]:
def predict(x, weights): 
    return np.matmul(x, weights)

def cost_func(y, y_hat):
    return np.mean(np.square(y_hat - y))

def count_gradient(x, weights, y):
    vec_w = np.zeros(shape=(len(weights), ))
   
    for i, weight in enumerate(weights):
        y_hat = predict(x, weights)
        vec_w[i] = np.dot((y - y_hat), x[:, i]) / len(x) # count partial derivative
   
    return vec_w

## Step 3 - Gradient descent algorithm

Now we can define an algorithm <br>
NOTE: `epoch_results = []` is not a part of the algo itself. Is is used for visualization of all epochs

In [47]:
def gradient_descent_algorithm(x, weights, y, epochs, lr, display_cost = True):
    epoch_results = [] #for visualisation
    
    for epoch in range(epochs):

        y_hat = predict(x, weights)

        epoch_results.append({'y_hat': y_hat, 'w':weights, 'e': epoch}) # save results
        
        if display_cost:
            cost = cost_func(y, y_hat)
            print(cost)

        gradient = count_gradient(x, weights, y)

        weights = np.add(weights, lr*gradient) 
    
    return weights, epoch_results

## Step 4 - Prepare visualization

In [48]:
def draw_function(x, y, plot = plt, as_line = False, color = 'blue'):
    if as_line:
        plot.plot(x[:, 1], y, color = color)
    else:
        plot.scatter(x[:, 1],y, color = color, s=0.001)

def visualize_gd_results(epoch_results, lr, x, y):
    saved_epochs_n = len(epoch_results)

    n_rows = saved_epochs_n // 4
    n_cols = saved_epochs_n//n_rows

    subplots = plt.subplots(n_rows, n_cols)
    subplots[0].suptitle(f'Linear regression with Gradient Descent (lr = {lr})')
    subplots = subplots[1].reshape(saved_epochs_n, )

    for i, plot in enumerate(subplots):
        draw_function(x, y, plot)
        draw_function(x, epoch_results[i]['y_hat'], plot, True, 'red')
        
        counted_legend = f'w_1 = {"{:.3f}".format(epoch_results[i]["w"][1])}\nw_0 = {"{:.4f}".format(epoch_results[i]["w"][0])}'
        data_legend = 'Data'
        
        plot.legend([counted_legend, data_legend])
        plot.set_title(label = f"epoch = {epoch_results[i]['e']}")


## Step 5 - put it all together

Drawing the plot of all epochs may take some time

In [49]:
x, y = read_real_dataset() 
weights = np.random.normal(0, 1, x.shape[1]) # generate random weights ~ N(0, 1)

epochs, learning_rate = 8, 0.7

weights, epoch_results = gradient_descent_algorithm(x, weights, y, epochs, learning_rate)
y_hat = predict(x, weights)

print("w_1 (k) - {:.3f}".format(weights[1]))
print("w_0 (b) - {:.3f}".format(weights[0]))

visualize_gd_results(epoch_results, learning_rate, x, y)
plt.show()

draw_function(x, y, as_line=False)
draw_function(x, y_hat, as_line=True, color='red')

plt.show()

6.810013512970907
1.0744346251161194
0.558185943034731
0.5117193683464364
0.5075369991459109
0.5071605519374556
0.5071266686297975
0.5071236188567482
w_1 (k) - 0.702
w_0 (b) - 0.000


KeyboardInterrupt: 