In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

In [2]:
# 1. Load the dataset
df = pd.read_csv('train.csv')

In [3]:
# 2. Data Preprocessing
features = ['OverallQual', 'GrLivArea', 'TotalBsmtSF', 'GarageCars', 'FullBath', 'YearBuilt']
target = 'SalePrice'

In [4]:
X = df[features]
y = df[target]

In [6]:
# a. Handling missing values
X = X.fillna(0) 

In [7]:
# 3. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# 4. Implement Algorithm: Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)

# 5. Train the model
model.fit(X_train, y_train)

# 6. Evaluate the model
y_pred = model.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"Model Performance:")
print(f"MAE: ${mae:,.2f}")
print(f"RMSE: ${rmse:,.2f}")
print(f"R² Score: {r2:.4f}")

# 7. Save the model
joblib.dump(model, 'house_price_model.pkl')
print("Model saved successfully as 'house_price_model.pkl'")

Model Performance:
MAE: $19,227.63
RMSE: $28,971.42
R² Score: 0.8906
Model saved successfully as 'house_price_model.pkl'
