Linear Regression is one of the most fundamental and widely used algorithms in supervised machine learning and statistics. It models the relationship between an independent variable (x) and a dependent variable (y) by fitting a straight line (called the regression line) to the data.

In [2]:
# Sample dataset
x = [1, 2, 3]
y = [2, 3, 5]

In [3]:
# Step 1: Calculate means
x_mean = sum(x) / len(x)
y_mean = sum(y) / len(y)

In [4]:
x_mean, y_mean

(2.0, 3.3333333333333335)

In [5]:
# Step 2: Calculate the slope (w)
numerator = sum((xi - x_mean) * (yi - y_mean) for xi, yi in zip(x, y))
denominator = sum((xi - x_mean) ** 2 for xi in x)
w = numerator / denominator

In [6]:
w

1.5

In [7]:
# Step 3: Calculate the intercept (b)
b = y_mean - w * x_mean

In [8]:
# Step 4: Define the prediction function
def predict(x_val):
    return w * x_val + b

In [9]:
# Output the equation of the line
print(f"Regression Line: ŷ = {w:.2f}x + {b:.2f}")

Regression Line: ŷ = 1.50x + 0.33


In [10]:
# Step 5: Make a prediction for x = 4
x_test = 4
y_pred = predict(x_test)
print(f"Prediction for x = {x_test}: ŷ = {y_pred:.2f}")

Prediction for x = 4: ŷ = 6.33


 # Manual Least Squares Implementation

In [11]:
# Number of data points
n = len(x)

In [12]:
n

3

In [13]:
# Compute sums
sum_x = sum(x)
sum_y = sum(y)
sum_xy = sum(x[i] * y[i] for i in range(n))
sum_x_squared = sum(x[i] ** 2 for i in range(n))

In [14]:
sum_x, sum_y, sum_xy, sum_x_squared

(6, 10, 23, 14)

In [15]:
# Calculate weight (w) and bias (b)
w = (n * sum_xy - sum_x * sum_y) / (n * sum_x_squared - sum_x ** 2)
b = (sum_y - w * sum_x) / n

In [16]:
x_value = 4
y_pred = w * x_value + b

In [17]:
y_pred

6.333333333333333