##Import Library

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import math

url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)
df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


##Pisahkan Fitur dan Target

In [9]:
# Langkah 3: Pisahkan Fitur dan Target
X = df.drop(columns=['medv'])  # Fitur
y = df['medv']                 # Target

# Split data (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##Model - Bagging (Random Forest)

In [10]:
# Langkah 4: Buat Model - Bagging (Random Forest)
bagging_model = RandomForestRegressor(n_estimators=100, random_state=42)
bagging_model.fit(X_train, y_train)
y_pred_bagging = bagging_model.predict(X_test)

##Model - Boosting (Gradient Boosting)

In [11]:
# Langkah 5: Buat Model - Boosting (Gradient Boosting)
boosting_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
boosting_model.fit(X_train, y_train)
y_pred_boosting = boosting_model.predict(X_test)

##Evaluasi Model

In [12]:
# Langkah 6: Evaluasi Model
def evaluate(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, r2

mse_bag, rmse_bag, r2_bag = evaluate(y_test, y_pred_bagging)
mse_boost, rmse_boost, r2_boost = evaluate(y_test, y_pred_boosting)

##Hasil

In [13]:
# Langkah 7: Tampilkan Hasil
print("🔹 Bagging (Random Forest)")
print(f"MSE       : {mse_bag:.2f}")
print(f"RMSE      : {rmse_bag:.2f}")
print(f"R-Squared : {r2_bag:.2f}\n")

print("🔹 Boosting (Gradient Boosting)")
print(f"MSE       : {mse_boost:.2f}")
print(f"RMSE      : {rmse_boost:.2f}")
print(f"R-Squared : {r2_boost:.2f}")

🔹 Bagging (Random Forest)
MSE       : 7.90
RMSE      : 2.81
R-Squared : 0.89

🔹 Boosting (Gradient Boosting)
MSE       : 6.21
RMSE      : 2.49
R-Squared : 0.92


#**Penjelasan Matematika**
# 1. Mean Squared Error (MSE)

$$MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y_i})^2$$

- rata-rata error kuadrat (semakin kecil semakin baik)

# 2. Root Mean Squared Error (RMSE)

$$RMSE = \sqrt{MSE} = \sqrt{\frac{1}{n} \sum_{i=1}^{n}(y_i - \hat{y_i})^2}$$

- satuan error sama dengan target. RMSE sering digunakan untuk interpretasi yang lebih nyata.

# 3. R-Squared ($R^2$)

$$R^2 = 1 - \frac{\sum_{i=1}^{n}(y_i - \hat{y_i})^2}{\sum_{i=1}^{n}(y_i - \bar{y})^2}$$

- seberapa besar variasi data dijelaskan oleh model (maksimal = 1)
