# 🏠 House Price Modeling Notebook
This notebook performs EDA and builds a linear regression model to predict house prices.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

sns.set(style='whitegrid')

# Load dataset
df = pd.read_csv('../data/raw/house_data.csv')
df.head()

In [None]:
# Dataset overview
print('Dataset shape:', df.shape)
print('\nMissing values:\n', df.isnull().sum())
df.describe()

In [None]:
# Visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Price distribution
sns.histplot(df['price'], bins=20, kde=True, ax=axes[0, 0])
axes[0, 0].set_title('Distribution of House Prices')

# Price vs. Square Footage
sns.scatterplot(data=df, x='sqft', y='price', hue='bedrooms', ax=axes[0, 1])
axes[0, 1].set_title('Price vs. Square Footage')

# Average price by location
avg_price_location = df.groupby('location')['price'].mean().sort_values()
sns.barplot(x=avg_price_location.values, y=avg_price_location.index, ax=axes[1, 0])
axes[1, 0].set_title('Average Price by Location')

# Price by condition
sns.boxplot(data=df, x='condition', y='price', ax=axes[1, 1])
axes[1, 1].set_title('Price by House Condition')

plt.tight_layout()
plt.show()

In [None]:
# Linear Regression Modeling
X = df[['sqft', 'bedrooms', 'bathrooms']]
y = df['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print('Intercept:', model.intercept_)
print('Coefficients:', list(zip(X.columns, model.coef_)))

In [None]:
# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: ₹{mse:,.2f}')
print(f'R² Score: {r2:.2f}')