In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
dataset_path = 'Housing.csv'
df = pd. read_csv(dataset_path)

In [3]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [4]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols, columns=categorical_cols)
numerical_df = df. drop(categorical_cols, axis=1)
encoded_df = pd. concat([numerical_df, encoded_categorical_df], axis=1)

In [5]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

In [6]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

In [7]:
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

In [8]:
regressor = RandomForestRegressor(random_state=random_state)
regressor.fit(X_train, y_train)

In [9]:
y_pred = regressor.predict(X_val)

mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print('Evaluation results on validation set: ')
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: { mse}')

Evaluation results on validation set: 
Mean Absolute Error: 0.46093873321571177
Mean Squared Error: 0.37944418523089524


In [14]:
regressor = AdaBoostRegressor(random_state=random_state)
regressor.fit(X_train, y_train)

In [15]:
y_pred = regressor.predict(X_val)

mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print('Evaluation results on validation set: ')
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: { mse}')

Evaluation results on validation set: 
Mean Absolute Error: 0.567680019897059
Mean Squared Error: 0.5739244030038942


In [16]:
regressor = GradientBoostingRegressor(random_state=random_state)
regressor.fit(X_train, y_train)

In [17]:
y_pred = regressor.predict(X_val)

mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)

print('Evaluation results on validation set: ')
print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: { mse}')

Evaluation results on validation set: 
Mean Absolute Error: 0.4516626127750995
Mean Squared Error: 0.39610445936979427
