In [1]:
# libraries
# ==================================
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
import random
# ==================================

**Read 'Advertising' dataset**

In [2]:
df = pd.read_csv('./datasets/Advertising.csv')

df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.head()

Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


**Instance X and target Y (both normalized)**

In [3]:
X = df.drop(['sales'], axis=1) # features
Y = df['sales'] # target

# standardize the data
Y = np.array((Y-Y.mean())/Y.std())
X = X.apply(lambda x:(x-x.mean())/x.std(),axis=0)

**Initialize bias and weights to random values**

In [4]:
def initialize(dim):
  b = random.random()
  weights = np.random.rand(dim) 
  return b, weights

b,weights=initialize(3)
print(f'Bias: {b}, Weights: {weights}')

Bias: 0.055419667084502966, Weights: [0.74237888 0.9057786  0.49943938]


**Predict Y by a linear combination of X and weights + bias aggregation**

In [5]:
def predict_Y(b, weights, X):
  return b + np.dot(X, weights) # y = b + w_1*x_1 + w_2*x_2 + ...

Y_hat=predict_Y(b, weights, X)
Y_hat[0:10]

array([ 2.54668336,  0.48064392,  1.20220425,  1.83518237,  0.22549212,
        1.44244547, -0.29884259, -0.83488461, -3.11061663, -0.96357452])

**Calculate the error by MSE between predicted Y and target Y**

In [6]:
def compute_cost(Y, Y_hat) -> float:
    ''' Performs Mean Squared Error (MSE) cost function between the actual values and the predicted values. '''
    Y_resid = Y - Y_hat
    return np.sum(np.square(Y_resid)) / len(Y_resid) # MSE = sum((y - y_hat)^2) / n

Y_hat = predict_Y(b, weights, X)
cost = compute_cost(Y, Y_hat)

print(f'Cost: {cost}')


Cost: 0.6237334982826248


**Perform gradient descent by updating weights and bias**

In [7]:
def update_parameters(x, y, y_hat, b_0, w_0, learning_rate):
  db = (np.sum(y_hat - y) * 2) / len(y)
  dw = (np.dot((y_hat - y), x) * 2) / len(y)

  print(f'\nbias update for step: {db * learning_rate}, \nweights update for step: {dw * learning_rate}')
  next_b = b_0 - learning_rate * db # b0 - eta * nabla_b
  next_w = w_0 - learning_rate * dw # w0 - eta * nabla_w
  return next_b,next_w

print(f'Before update\n Bias: {b},\nWeights: {weights}')

Y_hat = predict_Y(b,weights,X)
b,weights = update_parameters(X,Y,Y_hat,b,weights,0.01)

print("--------------------------")

print(f'After update\n Bias: {b},\nWeights: {weights}')

Before update
 Bias: 0.055419667084502966,
Weights: [0.74237888 0.9057786  0.49943938]

bias update for step: 0.0011083933416900591, 
weights update for step: [0.00075801 0.01088725 0.01261529]
--------------------------
After update
 Bias: 0.05431127374281291,
Weights: [0.74162087 0.89489134 0.48682409]


In [8]:
def run_gradient_descent(X, Y, learning_rate, num_iterations):
  
  b, weights = initialize(X.shape[1])
  iter_num = 0
  gd_iterations_df = pd.DataFrame(columns=['iteration', 'cost']) # to store the cost at each iteration
  result_idx = 0

  for _ in range(num_iterations):
    Y_hat = predict_Y(b, weights, X)
    this_cost = compute_cost(Y, Y_hat)
    prev_b = b
    prev_weights = weights
    b,weights = update_parameters(X, Y, Y_hat, prev_b, prev_weights, learning_rate)
    
    if(iter_num%10==0):
      gd_iterations_df.loc[result_idx] = [iter_num,this_cost]
      result_idx=result_idx+1

    iter_num +=1
  
  print(f'Final Bias: {b}, Weights: {weights}')
  return gd_iterations_df, b, weights

gd_iterations_df, b, weights = run_gradient_descent(X,Y,learning_rate=0.01,num_iterations=200)


bias update for step: 0.013082760263392189, 
weights update for step: [-0.00666851  0.00384404  0.0069121 ]

bias update for step: 0.012821105058124345, 
weights update for step: [-0.00654779  0.00372611  0.00675498]

bias update for step: 0.012564682956961858, 
weights update for step: [-0.00642917  0.00361151  0.00660168]

bias update for step: 0.01231338929782262, 
weights update for step: [-0.00631261  0.00350013  0.00645211]

bias update for step: 0.012067121511866166, 
weights update for step: [-0.00619808  0.0033919   0.00630616]

bias update for step: 0.011825779081628844, 
weights update for step: [-0.00608555  0.00328672  0.00616375]

bias update for step: 0.011589263499996267, 
weights update for step: [-0.00597498  0.00318452  0.00602479]

bias update for step: 0.01135747822999634, 
weights update for step: [-0.00586634  0.00308521  0.0058892 ]

bias update for step: 0.011130328665396413, 
weights update for step: [-0.00575961  0.00298871  0.00575687]

bias update for step

In [9]:
gd_iterations_df.head()

Unnamed: 0,iteration,cost
0,0.0,0.783617
1,10.0,0.553886
2,20.0,0.402384
3,30.0,0.302195
4,40.0,0.235767
