In [2]:
import csv

def linear_regression(x, y):
    n = len(x)
    sum_x = sum(x)
    sum_y = sum(y)
    sum_xy = sum(x_i * y_i for x_i, y_i in zip(x, y))
    sum_x2 = sum(x_i**2 for x_i in x)

    # Slope (m) and Intercept (b)
    m = (n * sum_xy - sum_x * sum_y) / (n * sum_x2 - sum_x**2)
    b = (sum_y - m * sum_x) / n
    return m, b

def predict(x, m, b):
    return [m * x_i + b for x_i in x]

def mean_squared_error(y_true, y_pred):
    return sum((y_t - y_p)**2 for y_t, y_p in zip(y_true, y_pred)) / len(y_true)

# Load real dataset from CSV
def load_dataset(filepath):
    x, y = [], []
    with open(filepath, 'r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header row if any
        for row in reader:
            x.append(float(row[0]))  # Years of Experience
            y.append(float(row[1]))  # Salary
    return x, y

dataset_path = "homeprices (1).csv"
x, y = load_dataset(dataset_path)

# Train the model
m, b = linear_regression(x, y)
print(f"Slope (m): {m}")
print(f"Intercept (b): {b}")

# Predict
predictions = predict(x, m, b)
print(f"Predictions: {predictions}")

# Evaluate
mse = mean_squared_error(y, predictions)
print(f"Mean Squared Error: {mse}")


Slope (m): 118.29495955196018
Intercept (b): 257056.62725575606
Predictions: [564623.5220908525, 611941.5059116366, 635600.4978220286, 682918.4816428127, 730236.4654635967, 564623.5220908525, 588282.5140012446, 647429.9937772246, 682918.4816428127, 564623.5220908525, 600112.0099564407, 623771.0018668326, 682918.4816428127]
Mean Squared Error: 403506294.57661194
