<a href="https://colab.research.google.com/github/conglapgit45/Ensemble_Learning/blob/main/Ensemble_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [5]:
!gdown --id 1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3

Downloading...
From: https://drive.google.com/uc?id=1qeJqFtRdjjHqExbWJcgKy0yJbczTTAE3
To: /content/Housing.csv
100% 30.0k/30.0k [00:00<00:00, 53.5MB/s]


In [6]:
# Read data

dataset_path = '/content/Housing.csv'
df = pd.read_csv(dataset_path)
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [7]:
# Handle categorical fields

categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
print(categorical_cols)

['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']


In [8]:
# Convert categorical to numerical

ordinal_encoder = OrdinalEncoder ()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])
encoded_categorical_df = pd.DataFrame(
    encoded_categorical_cols,
    columns = categorical_cols
)
numerical_df = df.drop(categorical_cols, axis=1)
encoded_df = pd.concat([numerical_df, encoded_categorical_df], axis=1)
encoded_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking,mainroad,guestroom,basement,hotwaterheating,airconditioning,prefarea,furnishingstatus
0,13300000,7420,4,2,3,2,1.0,0.0,0.0,0.0,1.0,1.0,0.0
1,12250000,8960,4,4,4,3,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,12250000,9960,3,2,2,2,1.0,0.0,1.0,0.0,0.0,1.0,1.0
3,12215000,7500,4,2,2,3,1.0,0.0,1.0,0.0,1.0,1.0,0.0
4,11410000,7420,4,1,2,2,1.0,1.0,1.0,0.0,1.0,0.0,0.0


In [9]:
# Standardize data

normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)
dataset_arr

In [11]:
# Split data to features and label

X, y = dataset_arr[:, 1:], dataset_arr[:, 0]

In [12]:
# Split data to training and validation

test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size = test_size,
    random_state = random_state,
    shuffle = is_shuffle
)

In [15]:
# Train model by Random forest
regressor_rf = RandomForestRegressor(random_state = random_state)
regressor_rf.fit(X_train, y_train)

# Train model by AdaBoost
regressor_ab = AdaBoostRegressor(random_state = random_state)
regressor_ab.fit(X_train, y_train )

# Train model by Gradient Boosting
regressor_gb = GradientBoostingRegressor(random_state = random_state)
regressor_gb.fit(X_train, y_train )

In [18]:
# Validate the models using MAE and MSE

y_pred_rf = regressor_rf.predict(X_val)
y_pred_ab = regressor_ab.predict(X_val)
y_pred_gb = regressor_gb.predict(X_val)

mae_rf = mean_absolute_error(y_val, y_pred_rf)
mse_rf = mean_squared_error(y_val, y_pred_rf)
mae_ab = mean_absolute_error(y_val, y_pred_ab)
mse_ab = mean_squared_error(y_val, y_pred_ab)
mae_gb = mean_absolute_error(y_val, y_pred_gb)
mse_gb = mean_squared_error(y_val, y_pred_gb)

print('Evaluation results on validation set of Random forest:')
print(f'Mean Absolute Error: {mae_rf}')
print(f'Mean Squared Error: {mse_rf}')
print('')

print('Evaluation results on validation set of AdaBoost:')
print(f'Mean Absolute Error: {mae_ab}')
print(f'Mean Squared Error: {mse_ab}')
print('')

print('Evaluation results on validation set of Gradient Boosting:')
print(f'Mean Absolute Error: {mae_gb}')
print(f'Mean Squared Error: {mse_gb}')
print('')

Evaluation results on validation set of Random forest:
Mean Absolute Error: 0.46093873321571177
Mean Squared Error: 0.37944418523089524

Evaluation results on validation set of AdaBoost:
Mean Absolute Error: 0.567680019897059
Mean Squared Error: 0.5739244030038942

Evaluation results on validation set of Gradient Boosting:
Mean Absolute Error: 0.4516626127750995
Mean Squared Error: 0.39610445936979427

