### D.1 Bangging untuk regresi

In [259]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split #membagi dataset menjadi data latih dan data uji.
from sklearn.ensemble import BaggingRegressor # untuk bagging
from sklearn.linear_model import LinearRegression #memprediksi nilai kontinu berdasarkan hubungan linier antara variabel input dan output.
from sklearn.metrics import mean_squared_error #menghitung Mean Squared Error (MSE), yaitu ukuran untuk menilai seberapa baik model melakukan prediksi (semakin rendah MSE, semakin baik).

In [260]:
#Import dataset
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) #Fitur 1 (Fitur, Independen)

# Reshape X agar menjadi array 2D
# Mengubah data dari 1D ( matrix baris) menjadi 2D (matrix kolom)
X = X.reshape(-1, 1)
Y = np.array([2.5, 3.7, 4.8, 5.9, 6.2, 7.4, 8.5, 9.6, 10.7, 11.8, 12.9, 14.0]) # Fitur 2 (Target, Dependen)
print(X)

[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]]


In [261]:
# Split data untuk regresi
# Menggunakan fungsi train_test_split dari scikit-learn secara acak
# 25 data uji dan 75 data latih
#random_state = seed
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [262]:
print (X_train)
print (Y_train)

[[ 9]
 [ 6]
 [ 3]
 [ 2]
 [12]
 [ 5]
 [ 8]
 [ 4]
 [ 7]]
[10.7  7.4  4.8  3.7 14.   6.2  9.6  5.9  8.5]


In [263]:
# Bagging Regressor dengan Linear Regression sebagai base estimator
# nilai estimator = banyaknya himpunan (m)
be = LinearRegression()
model = BaggingRegressor (be, n_estimators=10, random_state=42)
model.fit(X_train, Y_train)

In [264]:
# Prediksi dan evaluasi
Y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
mae = np.mean(np.abs(Y_pred - Y_test))
r2_score = 1 - (np.sum((Y_pred - Y_test)**2) / np.sum((Y_test - np.mean(Y_test))**2))
print("MSE for Bagging Regressor (Linear Regression):", mse)
print("RMSE for Bagging Regressor (Linear Regression):", rmse)
print("MAE for Bagging Regressor (Linear Regression):", mae)
print("R2 Score for Bagging Regressor (Linear Regression):", r2_score)

MSE for Bagging Regressor (Linear Regression): 0.10325684374812753
RMSE for Bagging Regressor (Linear Regression): 0.3213360293339786
MAE for Bagging Regressor (Linear Regression): 0.30692349127096447
R2 Score for Bagging Regressor (Linear Regression): 0.9952552251928257


In [265]:
#Menampilkan data pada tiap model
print("Data tiap model bootsrap:")
for i, samples in enumerate(model.estimators_samples_, start=1):
  print(f"Data model {i} = {X_train [samples].flatten()}")

Data tiap model bootsrap:
Data model 1 = [ 3  4  4  7  2  3  7 12  8]
Data model 2 = [4 2 4 2 5 2 9 2 7]
Data model 3 = [8 9 4 4 7 5 7 3 5]
Data model 4 = [ 9  5  4 12  6  2  8  3  5]
Data model 5 = [8 8 2 2 7 2 4 7 8]
Data model 6 = [5 3 6 3 4 2 6 2 8]
Data model 7 = [2 3 6 5 3 2 6 5 5]
Data model 8 = [ 8  3  7  3  5  9 12  2  5]
Data model 9 = [ 7 12 12  2  3  2  8  7  2]
Data model 10 = [7 6 7 9 4 8 7 6 6]


In [266]:
#Menampilkan hasil evaluasi tiap model
# Loop untuk menampilkan akurasi setiap estimator/model
print("Akurasi tiap model:")
for i, estimator in enumerate (model.estimators_, start=1):
  # Prediksi menggunakan estimator ke-i
  Y_pred = estimator.predict(X_test)
  mse = mean_squared_error(Y_test, Y_pred)
  print(f"model {i} = {mse:.4f}")

Akurasi tiap model:
model 1 = 0.0294
model 2 = 0.0875
model 3 = 0.1207
model 4 = 0.0126
model 5 = 0.1079
model 6 = 0.2826
model 7 = 0.9931
model 8 = 0.0150
model 9 = 0.0070
model 10 = 0.0997


### D. 2Bagging untuk klasifikasi

In [267]:
#Import library
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [268]:
#Import dataset
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])
# Reshape X agar menjadi array 2D
X = X.reshape(-1, 1)
Y = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1])

In [269]:
# Split data untuk data uji latih dan data uji
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=42)

In [270]:
print (X_train)
print (Y_train)

[[ 9]
 [ 6]
 [ 3]
 [ 2]
 [12]
 [ 5]
 [ 8]
 [ 4]
 [ 7]]
[0 1 0 1 1 0 1 1 0]


In [271]:
# Bagging Classifier dengan Naive Bayes sebagai base estimator
be_c = GaussianNB()
model = BaggingClassifier(be_c, n_estimators=15, random_state=42)
model.fit(X_train, Y_train)

In [272]:
# Prediksi dan evaluasi
Y_pred = model.predict(X_test)
accuracy = accuracy_score (Y_test, Y_pred)
f1_score = accuracy_score (Y_test, Y_pred)
recall_score = accuracy_score (Y_test, Y_pred)
precision_score = accuracy_score (Y_test, Y_pred)
print("Accuracy for Bagging Classifier (Naive Bayes):", accuracy)
print("F1 Score for Bagging Classifier (Naive Bayes):", f1_score)
print("Recall Score for Bagging Classifier (Naive Bayes):", recall_score)
print("Precision Score for Bagging Classifier (Naive Bayes):", precision_score)

Accuracy for Bagging Classifier (Naive Bayes): 0.3333333333333333
F1 Score for Bagging Classifier (Naive Bayes): 0.3333333333333333
Recall Score for Bagging Classifier (Naive Bayes): 0.3333333333333333
Precision Score for Bagging Classifier (Naive Bayes): 0.3333333333333333


In [273]:
# Menampilkan data pada setiap model
print("Data tiap model bootsrap:")
for i, samples in enumerate (model.estimators_samples_, start=1):
  print(f"Data model {i} = {X_train [samples].flatten()}")

Data tiap model bootsrap:
Data model 1 = [ 3  4  4  7  2  3  7 12  8]
Data model 2 = [4 2 4 2 5 2 9 2 7]
Data model 3 = [8 9 4 4 7 5 7 3 5]
Data model 4 = [ 9  5  4 12  6  2  8  3  5]
Data model 5 = [8 8 2 2 7 2 4 7 8]
Data model 6 = [5 3 6 3 4 2 6 2 8]
Data model 7 = [2 3 6 5 3 2 6 5 5]
Data model 8 = [ 8  3  7  3  5  9 12  2  5]
Data model 9 = [ 7 12 12  2  3  2  8  7  2]
Data model 10 = [7 6 7 9 4 8 7 6 6]
Data model 11 = [ 2  3  7 12  8  8  2 12  3]
Data model 12 = [12  4  7  8  4  6  4  3  7]
Data model 13 = [ 2 12  7  8  6  5  3 12  2]
Data model 14 = [ 9  8  4 12  9  5  5  8  9]
Data model 15 = [ 9  7  3  9  3  8  9  2 12]


In [274]:
# Loop untuk menampilkan akurasi setiap estimator
print("Akurasi tiap model:")
for i, estimator in enumerate (model.estimators_, start=1):
  # Prediksi menggunakan estimator ke-i
  Y_pred = estimator.predict(X_test)
  acc = accuracy_score (Y_test, Y_pred)
  print(f"model {i} = {acc:.4f}")

Akurasi tiap model:
model 1 = 0.3333
model 2 = 0.3333
model 3 = 0.3333
model 4 = 0.3333
model 5 = 0.3333
model 6 = 0.3333
model 7 = 0.3333
model 8 = 0.6667
model 9 = 0.3333
model 10 = 0.0000
model 11 = 0.3333
model 12 = 0.3333
model 13 = 0.3333
model 14 = 0.3333
model 15 = 0.6667


### D.3 Implementasi Bagging pada dataset berdimensi tinggi

In [275]:
#Import library
import pandas as pd
from sklearn.ensemble import BaggingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [276]:
#Import dataset
data = pd.read_csv('/content/df_final_features.csv')
data.head(5)

Unnamed: 0,Name,Sex,Age,Height,Weight,Team,Year,Season,Host_City,Host_Country,...,GDP_Per_Capita_Constant_LCU_Value,Cereal_yield_kg_per_hectare_Value,Military_expenditure_current_LCU_Value,Tax_revenue_current_LCU_Value,Expense_current_LCU_Value,Central_government_debt_total_current_LCU_Value,Representing_Host,Avg_Temp,Medal,Medal_Binary
0,A Dijiang,M,24.0,180.0,80.0,China,1992,Summer,Barcelona,Spain,...,6875.676999,4362.3,68492870000.0,1605180000000.0,7161170000000.0,4396980000000.0,0,6.95,0,0
1,A Lamusi,M,23.0,170.0,60.0,China,2012,Summer,London,United Kingdom,...,41274.12736,5825.2,993500000000.0,5524090000000.0,7161170000000.0,39741300000000.0,0,6.95,0,0
2,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,1988,Winter,Calgary,Canada,...,24946.56591,6194.0,6035300000.0,53110440000.0,112510000000.0,144152000000.0,0,9.25,0,0
3,Christine Jacoba Aaftink,F,21.0,185.0,82.0,Netherlands,1988,Winter,Calgary,Canada,...,24946.56591,6194.0,6035300000.0,53110440000.0,112510000000.0,144152000000.0,0,9.25,0,0
4,Christine Jacoba Aaftink,F,25.0,185.0,82.0,Netherlands,1992,Winter,Albertville,France,...,27485.5034,7459.2,6307500000.0,68461820000.0,133842000000.0,162458000000.0,0,9.25,0,0


In [277]:
# Persiapan data
from sklearn.impute import SimpleImputer # Import SimpleImputer

#defined X and Y
X = data[['Age', 'Height', 'Weight']] #data
Y = data['Sex'] # Target

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)
#Create an imputer to fill NaN values with the mean of each column
imputer = SimpleImputer (strategy='mean') # or strategy='median'

# Fit the imputer on the training data and transform both training and testing data
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

In [280]:
print (X_train)
print (y_train)

#tampilkan jumlah data latih x (X_train)
print(X_train.shape)

[[ 18.        189.         79.       ]
 [ 21.        161.         63.       ]
 [ 23.        166.         70.       ]
 ...
 [ 26.        186.         70.       ]
 [ 36.        181.         82.       ]
 [ 22.        177.6685237  73.0266582]]
101804    M
71686     F
53571     M
197794    F
129378    F
         ..
119879    F
103694    M
131932    M
146867    M
121958    M
Name: Sex, Length: 151962, dtype: object
(151962, 3)


In [279]:
# Melatih dan evaluasi model tanpa bagging

# model Decision Tree tanpa bagging
model = DecisionTreeClassifier()

# Melatih model
model.fit(X_train, y_train)

# Membuat prediksi pada data uji
y_pred = model.predict(X_test)

# Menghitung akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi DecisionTree tanpa bagging: {accuracy:.4f}")

Akurasi DecisionTree tanpa bagging: 0.8041


In [258]:
#Melatih dan evaluasi model dengan bagging

be_c = DecisionTreeClassifier()
#Membuat model Bagging dengan Naive Bayes sebagai estimator dasar
bagging_clf = BaggingClassifier(be_c, n_estimators=100, random_state=42)

#Melatih model
bagging_clf.fit(X_train, y_train)

#Membuat prediksi pada data uji
y_pred = bagging_clf.predict(X_test)

#Menghitung akurasi
accuracy = accuracy_score(y_test, y_pred)
print(f"Akurasi Bagging dengan DecisionTree: {accuracy:.4f}")

Akurasi Bagging dengan DecisionTree: 0.8088
