In [2]:
# Import thư viện cần thiết
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

# Tải và đọc bộ dữ liệu
dataset_path = '/content/Housing.csv'
df = pd.read_csv(dataset_path)

# Xử lý dữ liệu categorical
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

# Chuyển đổi dữ liệu categorical thành dạng số
encoder = OrdinalEncoder()
encoded_categorical_df = encoder.fit_transform(df[categorical_cols])
numerical_df = df.drop(columns=categorical_cols)
encoded_df = pd.concat([numerical_df, pd.DataFrame(encoded_categorical_df, columns=categorical_cols)], axis=1)

# Chuẩn hóa bộ dữ liệu
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)

# Tách dữ liệu X, y
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

# Chia tập dữ liệu train, val
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=is_shuffle)

# Huấn luyện mô hình Random Forest
regressor_rf = RandomForestRegressor(random_state=random_state)
regressor_rf.fit(X_train, y_train)

# Huấn luyện mô hình AdaBoost
from sklearn.ensemble import AdaBoostRegressor
regressor_ab = AdaBoostRegressor(random_state=random_state)
regressor_ab.fit(X_train, y_train)

# Huấn luyện mô hình Gradient Boosting
from sklearn.ensemble import GradientBoostingRegressor
regressor_gb = GradientBoostingRegressor(random_state=random_state)
regressor_gb.fit(X_train, y_train)

# Đánh giá mô hình
y_pred_rf = regressor_rf.predict(X_val)
y_pred_ab = regressor_ab.predict(X_val)
y_pred_gb = regressor_gb.predict(X_val)

# Tính toán MAE và MSE cho Random Forest
mae_rf = mean_absolute_error(y_val, y_pred_rf)
mse_rf = mean_squared_error(y_val, y_pred_rf)
print('Random Forest Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_rf}')
print(f'Mean Squared Error: {mse_rf}')

# Tính toán MAE và MSE cho AdaBoost
mae_ab = mean_absolute_error(y_val, y_pred_ab)
mse_ab = mean_squared_error(y_val, y_pred_ab)
print('AdaBoost Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_ab}')
print(f'Mean Squared Error: {mse_ab}')

# Tính toán MAE và MSE cho Gradient Boosting
mae_gb = mean_absolute_error(y_val, y_pred_gb)
mse_gb = mean_squared_error(y_val, y_pred_gb)
print('Gradient Boosting Evaluation results on validation set:')
print(f'Mean Absolute Error: {mae_gb}')
print(f'Mean Squared Error: {mse_gb}')

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
Random Forest Evaluation results on validation set:
Mean Absolute Error: 0.46093873321571177
Mean Squared Error: 0.37944418523089524
AdaBoost Evaluation results on validation set:
Mean Absolute Error: 0.567680019897059
Mean Squared Error: 0.5739244030038942
Gradient Boosting Evaluation results on validation set:
Mean Absolute Error: 0.4516626127750995
Mean Squared Error: 0.39610445936979427
