# Import thư viện:

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Read dataset:

In [2]:
data_path = './Housing.csv'
df = pd.read_csv(data_path)

# Xử lý dữ liệu categorical:

In [3]:
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [4]:
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(
    df[categorical_cols]
)
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols, 
    columns=categorical_cols
)
numerical_df = df.drop(categorical_cols, axis=1)
encoded_df = pd.concat(
    [numerical_df, encoded_categorical_df], axis=1
)

# Chuẩn hóa bộ dữ liệu:

In [5]:
normalizer = StandardScaler()
data_arr = normalizer.fit_transform(encoded_df)

# Tách dữ liệu X (feature), y (label): 

In [6]:
X, y = data_arr[:, 1:], data_arr[:, 0]

# Chia tập dữ liệu train, val:

In [7]:
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

# Huấn luyện mô hình:

In [8]:
regressor_random_forest = RandomForestRegressor(
    random_state=random_state
)
regressor_random_forest.fit(X_train, y_train)

In [11]:
regressor_ada_boost = AdaBoostRegressor(
    random_state=random_state
)
regressor_ada_boost.fit(X_train, y_train)

In [12]:
regressor_gradient_boosting = GradientBoostingRegressor(
    random_state=random_state
)
regressor_gradient_boosting.fit(X_train, y_train)

# Đánh giá mô hình:

In [13]:
y_pred_rf = regressor_random_forest.predict(X_val)
y_pred_ada = regressor_ada_boost.predict(X_val)
y_pred_gradient = regressor_gradient_boosting.predict(X_val)

In [14]:
mae_rf = mean_absolute_error(y_val, y_pred_rf)
mse_rf = mean_squared_error(y_val, y_pred_rf)

print('Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_rf}')
print(f'Mean Squared Error: {mse_rf}')

Evaluation results on validation set:
Mean Absolute Error: 0.46093873321571177
Mean Squared Error: 0.37944418523089524


In [15]:
mae_ada = mean_absolute_error(y_val, y_pred_ada)
mse_ada = mean_squared_error(y_val, y_pred_ada)

print('Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_ada}')
print(f'Mean Squared Error: {mse_ada}')

Evaluation results on validation set:
Mean Absolute Error: 0.567680019897059
Mean Squared Error: 0.5739244030038942


In [16]:
mae_gradient = mean_absolute_error(y_val, y_pred_gradient)
mse_gradient = mean_squared_error(y_val, y_pred_gradient)

print('Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_gradient}')
print(f'Mean Squared Error: {mse_gradient}')

Evaluation results on validation set:
Mean Absolute Error: 0.4516626127750995
Mean Squared Error: 0.39610445936979427
