##Import dan Load Dataset

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

url = "https://raw.githubusercontent.com/farrelrassya/teachingMLDL/main/02.%20Deep%20Learning/Dataset/Infrared.csv"
df = pd.read_csv(url)

##Encode Data Kategorikal jika ada

In [28]:
for col in df.select_dtypes(include=['object']).columns:
    df[col] = LabelEncoder().fit_transform(df[col])

##Menangani Missing Values

In [29]:
df.fillna(df.mean(numeric_only=True), inplace=True)

##Pisahkan Fitur dan Target

In [30]:
target_column = df.columns[-1]
X = df.drop(target_column, axis=1)
y = df[target_column]

##Split Data Latih dan Uji

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

##Model Bagging Random Forest

In [32]:
bagging_model = RandomForestRegressor(n_estimators=100, random_state=42)
bagging_model.fit(X_train, y_train)
y_pred_bag = bagging_model.predict(X_test)

##Model Boosting Gradient Boosting

In [33]:
boosting_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
boosting_model.fit(X_train, y_train)
y_pred_boost = boosting_model.predict(X_test)

##Evaluasi Model

In [34]:
def evaluate_model(y_true, y_pred, model_name):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)

    print(f"\nEvaluasi Model {model_name}")
    print(f"MSE  : {mse:.4f}")
    print(f"RMSE : {rmse:.4f}")
    print(f"R²   : {r2:.4f}")

evaluate_model(y_test, y_pred_bag, "Random Forest (Bagging)")
evaluate_model(y_test, y_pred_boost, "Gradient Boosting (Boosting)")


Evaluasi Model Random Forest (Bagging)
MSE  : 0.0570
RMSE : 0.2388
R²   : 0.7291

Evaluasi Model Gradient Boosting (Boosting)
MSE  : 0.0494
RMSE : 0.2222
R²   : 0.7654


#**Penjelasan Matematika**

# 1. MSE (Mean Squared Error)

$$MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2$$

Mengukur seberapa jauh rata-rata hasil prediksi dari nilai asli. Lebih kecil = lebih baik.

# 2. RMSE (Root Mean Squared Error)

$$RMSE = \sqrt{MSE}$$

Akar dari MSE, unit-nya sama dengan target asli (misalnya derajat suhu).

# 3. R² (R-Squared)

$$R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}$$

Menunjukkan seberapa baik model menjelaskan variasi data. Semakin dekat ke 1, semakin baik.
