# Linear Regression from Scratch
This notebook implements simple linear regression manually without using scikit-learn.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv('Salary_dataset.csv')
X = df['YearsExperience']
y = df['Salary']

print(df.head())


## Step 1: Compute Mean Values

In [None]:

# Compute mean of X and y
x_mean = np.mean(X)
y_mean = np.mean(y)

print(f'Mean of X: {x_mean:.2f}, Mean of y: {y_mean:.2f}')


## Step 2: Compute Coefficients m (slope) and b (intercept)

In [None]:

# Formula implementation
m = np.sum((X - x_mean) * (y - y_mean)) / np.sum((X - x_mean)**2)
b = y_mean - m * x_mean

print(f"Slope (m): {m:.4f}")
print(f"Intercept (b): {b:.4f}")


## Step 3: Define Prediction Function and Generate Predictions

In [None]:

def predict(x):
    return m * x + b

df['Predicted_Salary'] = predict(df['YearsExperience'])
df.head()


## Step 4: Visualization

In [None]:

plt.scatter(X, y, color='blue', label='Actual Data')
plt.plot(X, df['Predicted_Salary'], color='red', label='Predicted Line')
plt.xlabel('Years of Experience')
plt.ylabel('Salary')
plt.title('Linear Regression (From Scratch)')
plt.legend()
plt.show()


## Step 5: Evaluate Model Performance

In [None]:

mse = np.mean((y - df['Predicted_Salary'])**2)
print(f"Mean Squared Error (Manual): {mse:.4f}")


### ✅ Task: Compare your computed `m` and `b` values with scikit-learn’s `LinearRegression` model.
You should see very similar numbers if your implementation is correct.