# House Price Prediction Model Development

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [None]:
df = pd.read_csv('../train.csv')
print(df.head())

In [None]:
features = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'GarageCars', 'FullBath', 'YearBuilt']
X = df[features]
y = df['SalePrice']
print(X.head())
print(y.head())

In [None]:
X = X.fillna(X.mean())
print("Missing values in X:", X.isnull().sum().sum())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)

In [None]:
model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)
print("Model trained.")

In [None]:
y_pred = model.predict(X_test)
print("Sample predictions:", y_pred[:5])

In [None]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)
print(f'MAE: {mae:.2f}')
print(f'MSE: {mse:.2f}')
print(f'RMSE: {rmse:.2f}')
print(f'RÂ²: {r2:.2f}')

In [None]:
joblib.dump(model, 'house_price_model.pkl')
print("Model saved to house_price_model.pkl")