In [5]:
# 🏠 House Price Prediction using Linear Regression

# 1. 📥 Importing Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pickle


In [6]:
# 2. 📊 Load Dataset
california = fetch_california_housing(as_frame=True)
df = california.frame
df.head()



URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1028)>

In [2]:
# 3. 📈 Basic Info and Summary
df.info()
df.describe()

NameError: name 'df' is not defined

In [None]:
# 4. 🔍 Null values check
df.isnull().sum()


In [None]:
# 5. 📉 Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
# 6. 📊 Distribution of Target
sns.histplot(df["MedHouseVal"], kde=True)
plt.title("Distribution of Target Variable (House Value)")
plt.show()


In [None]:
# 7. 🧹 Feature Selection and Splitting
X = df.drop("MedHouseVal", axis=1)
y = df["MedHouseVal"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# 8. 🤖 Model Training
model = LinearRegression()
model.fit(X_train, y_train)


In [None]:
# 9. 📈 Model Evaluation
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.3f}")
print(f"R² Score: {r2:.3f}")


In [None]:
# 10. 🔍 Actual vs Predicted Plot
plt.scatter(y_test, y_pred, alpha=0.5)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Actual vs Predicted House Prices")
plt.show()


In [None]:
# # 11. 💾 Save the Model
# with open("linear_regression_model.pkl", "wb") as f:
#     pickle.dump(model, f)
