# Residual Analysis and Linear Regression Assumptions

This notebook covers diagnostic plots and assumption checking for linear regression.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('../src')

from linear_regression import LinearRegression
from visualization import plot_residuals, plot_prediction_vs_actual
from utils import train_test_split

## Linear Regression Assumptions

1. **Linearity**: Relationship between X and y is linear
2. **Independence**: Observations are independent
3. **Homoscedasticity**: Constant variance of residuals
4. **Normality**: Residuals are normally distributed
5. **No multicollinearity**: Features are not highly correlated

In [None]:
# Load real estate data
import pandas as pd
df = pd.read_csv('../data/real_estate_prices.csv')

X = df[['size_sqft', 'bedrooms', 'age_years']].values
y = df['price'].values

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"RÂ² Score: {model.score(X_test, y_test):.3f}")

In [None]:
# Residual analysis
plot_residuals(y_test, y_pred)
plt.show()