## 1) Implement Linear Regression and calculate sum of residual error on the following Datasets. x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] , y = [1, 3, 2, 5, 7, 8, 8, 9, 10, 12]
## a) Compute the regression coefficients using analytic formulation and calculate Sum Squared Error (SSE) and R 2 value.

In [23]:
import numpy as np
import pandas as pd

In [24]:
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])

In [25]:
def linear_regression_analytic(x, y):
    n = len(x)
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    xy_mean = np.mean(x*y)
    x_squared_mean = np.mean(x**2)
    b1 = (xy_mean - x_mean*y_mean) / (x_squared_mean - x_mean**2)
    b0 = y_mean - b1*x_mean
    y_pred = b0 + b1*x
    sse = np.sum((y - y_pred)**2)
    ss_total = np.sum((y - y_mean)**2)
    r_squared = 1 - (sse / ss_total)
    return b0, b1, sse, r_squared
b0_analytic, b1_analytic, sse_analytic, r_squared_analytic = linear_regression_analytic(x, y)
print("Analytic Solution:")
print("b0:", b0_analytic)
print("b1:", b1_analytic)
print("SSE:", sse_analytic)
print("Rsquare:", r_squared_analytic)

Analytic Solution:
b0: 1.2363636363636372
b1: 1.1696969696969695
SSE: 5.624242424242422
Rsquare: 0.952538038613988


In [26]:
def sum_residual_errors(x, y, b0, b1):
    y_pred = b0 + b1 * x
    residual_errors = y - y_pred
    sum_residual = np.sum(residual_errors)
    return sum_residual
b0_analytic, b1_analytic, _, _ = linear_regression_analytic(x, y)
sum_residual_analytic = sum_residual_errors(x, y, b0_analytic, b1_analytic)
print("Sum of Residual Errors:", sum_residual_analytic)

Sum of Residual Errors: 2.220446049250313e-15


## b) Implement gradient descent (both Full-batch and Stochastic with stopping criteria) on Least Mean Square loss formulation to compute the coefficients of regression matrix and compare the results using performance measures such as R 2 SSE etc.

In [21]:
def full_batch_gradient_descent(x, y, learning_rate=0.01, epochs=1000, tolerance=1e-6):
    n = len(x)
    b0 = 0
    b1 = 0
    prev_loss = float('inf')

    for epoch in range(epochs):
        y_pred = b0 + b1*x

        gradient_b0 = (-2/n) * np.sum(y - y_pred)
        gradient_b1 = (-2/n) * np.sum((y - y_pred) * x)

        b0 -= learning_rate * gradient_b0
        b1 -= learning_rate * gradient_b1

        loss = np.sum((y - y_pred)**2)

        if abs(prev_loss - loss) < tolerance:
            break

        prev_loss = loss

    return b0, b1, loss

b0_full, b1_full, sse_full = full_batch_gradient_descent(x, y)
print("\nFull-batch Gradient Descent:")
print("b0:", b0_full)
print("b1:", b1_full)
print("SSE:", sse_full)


Full-batch Gradient Descent:
b0: 1.230898466943318
b1: 1.170568526128318
SSE: 5.624329890820989


In [22]:
def stochastic_gradient_descent(x, y, learning_rate=0.01, epochs=1000, tolerance=1e-6):
    n = len(x)
    b0 = 0
    b1 = 0
    prev_loss = float('inf')

    for epoch in range(epochs):

        indices = np.random.permutation(n)
        x_shuffled = x[indices]
        y_shuffled = y[indices]

        for i in range(n):
            y_pred = b0 + b1*x_shuffled[i]

            gradient_b0 = -2 * (y_shuffled[i] - y_pred)
            gradient_b1 = -2 * (y_shuffled[i] - y_pred) * x_shuffled[i]

            b0 -= learning_rate * gradient_b0
            b1 -= learning_rate * gradient_b1

        y_pred = b0 + b1*x
        loss = np.sum((y - y_pred)**2)

        if abs(prev_loss - loss) < tolerance:
            break

        prev_loss = loss

    return b0, b1, loss


b0_stochastic, b1_stochastic, sse_stochastic = stochastic_gradient_descent(x, y)
print("\nStochastic Gradient Descent:")
print("b0", b0_stochastic)
print("b1:", b1_stochastic)
print("SSE:", sse_stochastic)


Stochastic Gradient Descent:
b0 1.2773326537867213
b1: 1.21029502331422
SSE: 6.260458198957624


## 2) Download Boston Housing Rate Dataset. Analyse the input attributes and find out the attribute that best follow the linear relationship with the output price. Implement both the analytic formulation and gradient descent (Full-batch, stochastic) on LMS loss formulation to compute the coefficients of regression matrix and compare the results.

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

housing_data = pd.read_csv("housing (3).csv")

selected_attribute = 'median_income'
X = housing_data[selected_attribute].values.reshape(-1, 1)
y = housing_data['median_house_value'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train_with_intercept = np.c_[np.ones(X_train.shape[0]), X_train]
X_test_with_intercept = np.c_[np.ones(X_test.shape[0]), X_test]

theta_analytic = np.linalg.inv(X_train_with_intercept.T.dot(X_train_with_intercept)).dot(X_train_with_intercept.T).dot(y_train)
print("Coefficients using Analytic Formulation:", theta_analytic)

def full_batch_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    for _ in range(num_iterations):
        y_pred = X.dot(theta)
        theta -= (1/len(y)) * learning_rate * X.T.dot(y_pred - y)
    return theta

learning_rate = 0.01
num_iterations = 1000
theta_full_batch = full_batch_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Full-batch Gradient Descent:", theta_full_batch)

def stochastic_gradient_descent(X, y, learning_rate, num_iterations):
    theta = np.zeros(X.shape[1])
    m = len(y)  # Number of training examples

    # Shuffle the training data once before training
    shuffle_index = np.random.permutation(len(y))
    X_shuffled = X[shuffle_index]
    y_shuffled = y[shuffle_index]

    # SGD loop
    for _ in range(num_iterations):
        for xi, yi in zip(X_shuffled, y_shuffled):
            y_pred = np.dot(xi, theta)
            gradient = xi * (y_pred - yi)
            theta -= learning_rate * gradient

    return theta

theta_stochastic = stochastic_gradient_descent(X_train_with_intercept, y_train, learning_rate, num_iterations)
print("Coefficients using Stochastic Gradient Descent:", theta_stochastic)

Coefficients using Analytic Formulation: [44459.72916908 41933.84939381]
Coefficients using Full-batch Gradient Descent: [39148.47787113 43047.96802282]
Coefficients using Stochastic Gradient Descent: [39555.8163694  43224.03546802]


In [6]:
# Predictions
y_pred_analytic = X_test_with_intercept.dot(theta_analytic)
y_pred_full_batch = X_test_with_intercept.dot(theta_full_batch)
y_pred_stochastic = X_test_with_intercept.dot(theta_stochastic)

# Calculate SSE
SSE_analytic = np.sum((y_test - y_pred_analytic) ** 2)
SSE_full_batch = np.sum((y_test - y_pred_full_batch) ** 2)
SSE_stochastic = np.sum((y_test - y_pred_stochastic) ** 2)

# Calculate total sum of squares (SST)
mean_y = np.mean(y_test)
SST = np.sum((y_test - mean_y) ** 2)

# Calculate R-squared
R_squared_analytic = 1 - (SSE_analytic / SST)
R_squared_full_batch = 1 - (SSE_full_batch / SST)
R_squared_stochastic = 1 - (SSE_stochastic / SST)

print("SSE and R-squared value:")
print("Analytic Formulation: SSE =", SSE_analytic, ", R-squared =", R_squared_analytic)
print("Full-batch Gradient Descent: SSE =", SSE_full_batch, ", R-squared =", R_squared_full_batch)
print("Stochastic Gradient Descent: SSE =", SSE_stochastic, ", R-squared =", R_squared_stochastic)

SSE and R-squared value:
Analytic Formulation: SSE = 29272299281848.188 , R-squared = 0.45885918903846656
Full-batch Gradient Descent: SSE = 29321631561932.883 , R-squared = 0.4579472104543938
Stochastic Gradient Descent: SSE = 29323223635892.875 , R-squared = 0.4579177786634189
