Problem : Predicting the prices of houses based on their characteristics

Method  : Probability-based learning

Model   : Bayesian regression

In [10]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the dataset
housing_data = pd.read_csv('https://raw.githubusercontent.com/Yamin88/datasets/main/california_housing.csv')

# Encode categorical variable using one-hot encoding
housing_data = pd.get_dummies(housing_data, columns=['ocean_proximity'])

# Impute missing values using median value of each column
housing_data = housing_data.fillna(housing_data.median())

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(housing_data.drop(columns=['median_house_value']), 
                                                    housing_data['median_house_value'], 
                                                    test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Feature Selection
corr_matrix = housing_data.corr()
corr_matrix['median_house_value'].sort_values(ascending=False)

# The most important features are median_income, total_rooms, housing_median_age, and latitude.

# Train the Bayesian regression model
bayesian_reg = BayesianRidge()
bayesian_reg.fit(X_train_scaled, y_train)

# Make predictions on the testing set
y_pred = bayesian_reg.predict(X_test_scaled)

# Evaluate the performance of the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Mean Squared Error: {:.2f}'.format(mse))
print('Mean Absolute Error: {:.2f}'.format(mae))
print('R^2 Score: {:.2f}'.format(r2))


Mean Squared Error: 4906991239.61
Mean Absolute Error: 50661.80
R^2 Score: 0.63
