In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib

df = pd.read_csv('housing.csv')
df.fillna(df.select_dtypes(include=[np.number]).median(), inplace=True)
df = pd.get_dummies(df, drop_first=True)

X = df.drop('median_house_value', axis=1)
y = df['median_house_value']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("MAE:", mean_absolute_error(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))

print("R² Score:", r2_score(y_test, y_pred))

joblib.dump(model, 'house_price_model.pkl')

from google.colab import files
files.download('house_price_model.pkl')


MAE: 31557.402807655042
MSE: 2383732405.1528254
RMSE: 48823.482108026925
R² Score: 0.8180924852672514


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
import pickle

# Assuming your trained model is called `model`
with open('house_price_model.pkl', 'wb') as f:
    pickle.dump(model, f)


In [6]:
from google.colab import files
files.download('house_price_model.pkl')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>