In [21]:
import pandas as pd
import numpy as np


In [22]:
# Cost Function MSE
def cost_function(Y,b,w,X):
	m = len(Y)
	sse = 0 # sum of squares error

	for i in range(0,m):
		y_hata=b + w * X[i]
		y=Y[i]

		sse += (y_hata - y)**2

	mse = sse/m
	return mse

In [23]:
# Update Weights
def update_weights(Y, b, w, X, learning_rate):
	m = len(Y)

	b_deriv_sum=0
	w_deriv_sum=0

	for i in range(0,m):
		y_hata=b + w * X[i]
		y=Y[i]
		b_deriv_sum += (y_hata-y) 
		w_deriv_sum += (y_hata-y) * X[i]

	new_b= b - (learning_rate * 1 / m * b_deriv_sum)
	new_w = w - (learning_rate * 1 / m * w_deriv_sum)

	return new_b, new_w

In [29]:
# Train
def train(Y, initial_b, initial_w, X, learning_rate, num_iters):

	print("Starting gradient descent at b = {0}, w = {1}, error = {2}".format(initial_b, initial_w, cost_function(Y, initial_b, initial_w, X)))
	
	b=initial_b
	w=initial_w

	cost_history=[]

	for i in range(num_iters):
		b,w=update_weights(Y,b,w,X,learning_rate)
		mse=cost_function(Y,b,w,X)
		cost_history.append(mse)

		if i%100==0:
			print("Iteration {0}: b = {1}, w = {2}, mse = {3}".format(i, b, w,mse))
	
	print(f"After  {num_iters} iterations b = {b}, w = {w}, mse = {mse}")

	return cost_history,b,w


In [25]:
df=pd.read_csv('datasets/advertising.csv')

In [26]:
X=df['Radio']
Y=df['Sales']

In [27]:
# hyperparameters

learning_rate=0.001
initial_b=0
initial_w=0
num_iters=1000

In [30]:
train(Y,initial_b,initial_w,X,learning_rate,num_iters)

Starting gradient descent at b = 0, w = 0, error = 256.71195
Iteration 0: b = 0.015130500000000005, w = 0.37928700000000015, mse = 78.21421362954206
Iteration 100: b = 0.3657836639693018, w = 0.4876566645879548, mse = 65.01572675191156
Iteration 200: b = 0.7029391521854101, w = 0.47733956755254325, mse = 62.74029183504324
Iteration 300: b = 1.0305180259037614, w = 0.4673155188716665, mse = 60.59228468898995
Iteration 400: b = 1.3487923007030542, w = 0.4575761947563942, mse = 58.56456916731474
Iteration 500: b = 1.6580262657911427, w = 0.4481135078479316, mse = 56.65040875849236
Iteration 600: b = 1.9584767034660184, w = 0.4389196005020228, mse = 54.84344420575474
Iteration 700: b = 2.2503931023431933, w = 0.4299868382641046, mse = 53.13767238025805
Iteration 800: b = 2.5340178645265286, w = 0.4213078035297942, mse = 51.52742633738474
Iteration 900: b = 2.8095865068945893, w = 0.41287528938544316, mse = 50.00735648992184
After  1000 iterations b = 3.0746884529612175, w = 0.4047630604660

([78.21421362954206,
  68.01628103529549,
  67.41161822619242,
  67.35379480708053,
  67.32715523437726,
  67.30230662114793,
  67.2775735358361,
  67.2528604694312,
  67.2281619700851,
  67.20347771930166,
  67.17880769119546,
  67.1541518765648,
  67.12951026716313,
  67.1048828548031,
  67.08026963130499,
  67.05567058849394,
  67.03108571819992,
  67.00651501225755,
  66.98195846250623,
  66.95741606078988,
  66.9328877989573,
  66.90837366886196,
  66.8838736623619,
  66.85938777132006,
  66.83491598760382,
  66.81045830308541,
  66.78601470964173,
  66.76158519915438,
  66.73716976350947,
  66.71276839459799,
  66.6883810843155,
  66.66400782456222,
  66.63964860724316,
  66.61530342426775,
  66.59097226755034,
  66.56665512900982,
  66.5423520005697,
  66.51806287415819,
  66.49378774170813,
  66.46952659515712,
  66.44527942644726,
  66.42104622752531,
  66.39682699034266,
  66.37262170685554,
  66.3484303690245,
  66.32425296881493,
  66.30008949819683,
  66.27593994914474,
  

In [17]:
cost_history,b,w,=train(Y,initial_b,initial_w,X,learning_rate,num_iters)

Starting gradient descent at b = 0, w = 0, error = 256.71195
Iteration 0: b = 0.015130500000000005, w = 0.37928700000000015, error = 78.21421362954206
Iteration 100: b = 0.3657836639693018, w = 0.4876566645879548, error = 65.01572675191156
Iteration 200: b = 0.7029391521854101, w = 0.47733956755254325, error = 62.74029183504324
Iteration 300: b = 1.0305180259037614, w = 0.4673155188716665, error = 60.59228468898995
Iteration 400: b = 1.3487923007030542, w = 0.4575761947563942, error = 58.56456916731474
Iteration 500: b = 1.6580262657911427, w = 0.4481135078479316, error = 56.65040875849236
Iteration 600: b = 1.9584767034660184, w = 0.4389196005020228, error = 54.84344420575474
Iteration 700: b = 2.2503931023431933, w = 0.4299868382641046, error = 53.13767238025805
Iteration 800: b = 2.5340178645265286, w = 0.4213078035297942, error = 51.52742633738474
Iteration 900: b = 2.8095865068945893, w = 0.41287528938544316, error = 50.00735648992184
After  1000 iterations b = 3.0746884529612175,