Implement Gradient Descent 


In [7]:
def compute_cost(x, y, w, b):

    m = x.shape[0]
    cost_sum = 0
    for i in range(m):
        f_wb = w * x[i] + b
        cost = (f_wb - y[i]) ** 2
        cost_sum = cost_sum + cost
    total_cost = (1 / (2 * m)) * cost_sum
    return total_cost

In [8]:
import numpy as np
# first iteration 
# numpy array containing input features
x = np.array([1, 2, 3, 4, 5])
# numpy array containing target values
y = np.array([2, 4, 6, 8, 10])

# w is the weight (or slope) that controls how much the feature  x affects the prediction.
w = 50


# The higher line represents a positive bias shifting the predictions up, while the lower line shows a negative bias shifting the predictions down.
b = 5
alpha = 0.1

# returns the shape of x 
m = x.shape[0]

f_wb = w * x + b
print(f_wb)


temp_w = w - alpha * (1 / m) * np.sum((f_wb - y) * x)

temp_b = b - alpha * (1 / m) * np.sum(f_wb - y)


print("Cost before update: ", compute_cost(x, y, w, b))

print("Cost after update: ", compute_cost(x, y, temp_w, temp_b))

print("Updated w: ", temp_w)

print("Updated b: ", temp_b)

[ 55 105 155 205 255]
Cost before update:  13404.5
Cost after update:  454.41000000000133
Updated w:  -4.300000000000011
Updated b:  -9.900000000000002


In [9]:
# Performing gradient descent with many iterations
for iteration in range(m):
    current_w = temp_w
    current_b = temp_b
    predictions = current_w * x + current_b

    # Updating weights and bias
    gradient_w = (1 / m) * np.sum((predictions - y) * x)
    gradient_b = (1 / m) * np.sum(predictions - y)
    
    temp_w = current_w - alpha * gradient_w
    temp_b = current_b - alpha * gradient_b

    # Displaying cost for current iteration
    current_cost = compute_cost(x, y, temp_w, temp_b)
    print("Cost after update: ", current_cost)

    # Early stopping if the cost reaches zero
    if current_cost == 0:
        break

# Displaying final values
print("Final weight (w): ", temp_w)
print("Final bias (b): ", temp_b)
print("Final cost: ", compute_cost(x, y, temp_w, temp_b))
print("Iterations completed: ", iteration)


Cost after update:  20.10420000000007
Cost after update:  5.381316000000002
Cost after update:  4.729939919999999
Cost after update:  4.555585281599999
Cost after update:  4.4023352281919985
Final weight (w):  3.9281599999999997
Final bias (b):  -6.954551999999999
Final cost:  4.4023352281919985
Iterations completed:  4


In [10]:
import numpy as np

# Setting cost threshold and initialize minimum cost
cost_limit = 4
lowest_cost = float('inf')

# Performing iterative gradient descent
for iteration in range(m):
    current_w = temp_w
    current_b = temp_b

    # Computing predictions and cost function gradients
    predictions = current_w * x + current_b
    grad_w = (1 / m) * np.sum((predictions - y) * x)
    grad_b = (1 / m) * np.sum(predictions - y)

    # Updating weights and bias
    temp_w = current_w - alpha * grad_w
    temp_b = current_b - alpha * grad_b

    # Calculating current cost
    cost_now = compute_cost(x, y, temp_w, temp_b)
    print("Cost after update: ", cost_now)

    # Tracking optimal weights if cost improves
    if cost_now < lowest_cost:
        lowest_cost = cost_now
        best_w = temp_w
        best_b = temp_b

    # Checking for early exit conditions
    if cost_now == 0 or cost_now > cost_limit:
        break

# Displaying results after completion
print("Final weight (w): ", temp_w)
print("Final bias (b): ", temp_b)
print("Final cost: ", compute_cost(x, y, temp_w, temp_b))
print("Iterations completed: ", iteration)
print("Lowest cost encountered: ", lowest_cost)
print("Best weight (w): ", best_w)
print("Best bias (b): ", best_b)


Cost after update:  4.2547342650681585
Final weight (w):  3.8935495999999996
Final bias (b):  -6.837544799999999
Final cost:  4.2547342650681585
Iterations completed:  0
Lowest cost encountered:  4.2547342650681585
Best weight (w):  3.8935495999999996
Best bias (b):  -6.837544799999999
