In [23]:
import numpy as np
import pandas as pd
import math

In [37]:
# Data
population = np.array([10000, 15000, 20000, 9000])
years_in_business = np.array([5, 6, 6, 5])
profit = np.array([10000, 12000, 13000, 12000]) 

In [32]:

def simple_linear_regression(X, Y):

    theta_1 = np.sum((X - np.mean(X)) * (Y - np.mean(Y))) / np.sum((X - np.mean(X)) ** 2)
    theta_0 = np.mean(Y) - theta_1 * np.mean(X)

    return theta_1, theta_0


def gradient_descent(X, Y, learning_rate=0.01, epochs=1000):

    theta_1, theta_0 = 0, 0
    n = len(X)
    for _ in range(epochs):
        Y_pred = theta_1 * X + theta_0
        D_m = (-2/n) * np.sum(X * (Y - Y_pred))
        D_b = (-2/n) * np.sum(Y - Y_pred)
        theta_1 -= learning_rate * D_m
        theta_0 -= learning_rate * D_b
    return theta_1, theta_0



In [None]:
# Mean Squared Error (MSE) calculation
def mean_squared_error(y_true, y_pred):
    mse = np.mean((y_true - y_pred) ** 2)
    return mse

# R-squared (R²) calculation
def r_squared(y_true, y_pred):
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

In [33]:


m_simple, b_simple = simple_linear_regression(years_in_business, profit)

profit_pred_simple = m_simple * years_in_business + b_simple

print(f"Predicted X (Simple Linear Regression): {profit_pred_simple}")

m_simple_gd, b_simple_gd = gradient_descent(years_in_business, profit)

profit_pred_simple_gd = m_simple_gd * years_in_business + b_simple_gd


print(f"Predicted X (Simple Linear Regression GD): {profit_pred_simple_gd}")

Predicted X (Simple Linear Regression): [11000. 12500. 12500. 11000.]
Predicted X (Simple Linear Regression GD): [10738.1531512  12719.47937813 12719.47937813 10738.1531512 ]


In [34]:
# For normal case
mse_simple = mean_squared_error(profit, profit_pred_simple)
r2_simple = r_squared(profit, profit_pred_simple)
print(f"Simple Linear Regression MSE: {mse_simple}")
print(f"Simple Linear Regression R²: {r2_simple}\n")

# For Gradient Descent Case
mse_simple_gd = mean_squared_error(profit, profit_pred_simple_gd)
r2_simple_gd = r_squared(profit, profit_pred_simple_gd)
print(f"Simple Linear Regression (GD) MSE: {mse_simple_gd}")
print(f"Simple Linear Regression (GD) R²: {r2_simple_gd}\n")

Simple Linear Regression MSE: 625000.0
Simple Linear Regression R²: 0.4736842105263158

Simple Linear Regression (GD) MSE: 683367.4848264173
Simple Linear Regression (GD) R²: 0.42453264435670124



In [39]:
def multi_linear_regression(X1, X2, Y):
    X1_mean = np.mean(X1)
    X2_mean = np.mean(X2)
    Y_mean = np.mean(Y)
    m1 = np.sum((X1 - X1_mean) * (Y - Y_mean)) / np.sum((X1 - X1_mean) ** 2)
    m2 = np.sum((X2 - X2_mean) * (Y - Y_mean)) / np.sum((X2 - X2_mean) ** 2)
    b = Y_mean - m1 * X1_mean - m2 * X2_mean

    return m1, m2, b

# Fitting the Multi-Linear Regression model
m1_multi, m2_multi, b_multi = multi_linear_regression(population, years_in_business, profit)

# Predicting, including the missing value
profit_pred_multi = m1_multi * population + m2_multi * years_in_business + b_multi

# Predicting the missing value (X)
print(f"Predicted X (Multi-Linear Regression): {profit_pred_multi}")



Predicted X (Multi-Linear Regression): [10386.36363636 12762.98701299 13639.61038961 10211.03896104]


In [36]:
# For normal case
mse_multi = mean_squared_error(profit, profit_pred_multi)
r2_multi = r_squared(profit, profit_pred_multi)
print(f"Multi-Linear Regression MSE: {mse_multi}")
print(f"Multi-Linear Regression R²: {r2_multi}\n")



Multi-Linear Regression MSE: 1085227.2727272734
Multi-Linear Regression R²: 0.08612440191387505



In [22]:
print("Model Comparison and Interpretation:")
print(f"Simple Linear Regression MSE: {mse_simple}, R²: {r2_simple}")
print(f"Multi-Linear Regression MSE: {mse_multi}, R²: {r2_multi}")

if mse_multi < mse_simple:
    print("Multi-Linear Regression performs better with lower MSE.")
else:
    print("Simple Linear Regression performs better with lower MSE.")

if r2_multi > r2_simple:
    print("Multi-Linear Regression has a better fit with a higher R².")
else:
    print("Simple Linear Regression has a better fit with a higher R².")

Model Comparison and Interpretation:
Simple Linear Regression MSE: 625000.0, R²: 0.4736842105263158
Multi-Linear Regression MSE: 1085227.2727272734, R²: 0.08612440191387505
Simple Linear Regression performs better with lower MSE.
Simple Linear Regression has a better fit with a higher R².
