In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, LeaveOneOut
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

dataset = pd.read_csv('milk.csv')

# a. Hold-out Method (70%-30%)
train_data, test_data, train_label, test_label = train_test_split(dataset.iloc[:, :-1], dataset.iloc[:, -1], test_size=0.3, random_state=100)

# b. K-Fold Cross Validation (k=10)
kf = KFold(n_splits=10, shuffle=True, random_state=0)

# c. Leave-One-Out (LOO)
loo = LeaveOneOut()

print("Hold-out Method (70%-30%)")
print("Train data:", train_data)
print("Train label:", train_data)
print("Test data:", test_data)
print("Test label:", test_label)

print("\nK-Fold Cross Validation (k=10):\n", kf)

print("\nLeave-One-Out (LOO):\n", loo)

Hold-out Method (70%-30%)
Train data:       pH  Temprature  Taste  Odor  Fat   Turbidity  Colour
255  6.8          40      1     0     1          0     245
662  4.7          38      1     0     1          0     255
899  6.6          43      0     0     1          0     250
380  6.6          38      1     0     1          0     255
954  6.6          37      1     0     1          0     255
..   ...         ...    ...   ...   ...        ...     ...
802  6.6          45      0     1     1          1     250
53   8.1          66      1     0     1          1     255
350  6.5          38      1     0     0          0     255
79   6.8          34      0     0     0          1     240
792  3.0          40      1     1     1          1     255

[741 rows x 7 columns]
Train label:       pH  Temprature  Taste  Odor  Fat   Turbidity  Colour
255  6.8          40      1     0     1          0     245
662  4.7          38      1     0     1          0     255
899  6.6          43      0     0     1 

In [2]:
from sklearn.naive_bayes import GaussianNB

# Inisialisasi model Naive Bayes
nb_model = GaussianNB()

# a. Hold-out Method (70%-30%)
nb_model.fit(train_data, train_label)
holdout_pred = nb_model.predict(test_data)
holdout_accuracy = accuracy_score(test_label, holdout_pred)
print("Hold-out Method Accuracy:", holdout_accuracy)

# b. K-Fold Cross Validation (k=10)
kfold_accuracy = []
for train_index, test_index in kf.split(dataset):
    kf_train_data, kf_test_data = dataset.iloc[train_index, :-1], dataset.iloc[test_index, :-1]
    kf_train_label, kf_test_label = dataset.iloc[train_index, -1], dataset.iloc[test_index, -1]
    nb_model.fit(kf_train_data, kf_train_label)
    kf_pred = nb_model.predict(kf_test_data)
    kf_accuracy = accuracy_score(kf_test_label, kf_pred)
    kfold_accuracy.append(kf_accuracy)
print("K-Fold Cross Validation Accuracy:", np.mean(kfold_accuracy))

# c. Leave-One-Out (LOO)
loo_accuracy = []
for train_index, test_index in loo.split(dataset):
    loo_train_data, loo_test_data = dataset.iloc[train_index, :-1], dataset.iloc[test_index, :-1]
    loo_train_label, loo_test_label = dataset.iloc[train_index, -1], dataset.iloc[test_index, -1]
    nb_model.fit(loo_train_data, loo_train_label)
    loo_pred = nb_model.predict(loo_test_data)
    loo_accuracy.append(accuracy_score(loo_test_label, loo_pred))
print("Leave-One-Out Accuracy:", np.mean(loo_accuracy))

Hold-out Method Accuracy: 0.9182389937106918
K-Fold Cross Validation Accuracy: 0.9188319856244384
Leave-One-Out Accuracy: 0.931067044381492


In [3]:
# Inisialisasi MinMaxScaler
scaler = MinMaxScaler()

# Normalisasi data training
train_data_normalized = scaler.fit_transform(train_data)
test_data_normalized = scaler.transform(test_data)

In [8]:
# 5. Bandingkan nilai akurasi klasifikasi dengan Naïve Bayes pada salah satu metode validasi jika data training & data test dilakukan normalisasi & tidak dinormalisasi
nb_model.fit(train_data_normalized, train_label)
pred_normalized = nb_model.predict(test_data_normalized)
accuracy_holdout_normalized = accuracy_score(test_label, pred_normalized)
print(
    "Akurasi Hold-out Method (70%-30%) dengan Normalisasi:", accuracy_holdout_normalized
)

print(
    "Perbandingan Akurasi Hold-out Method (70%-30%) Tanpa Normalisasi dan dengan Normalisasi:",
    holdout_accuracy,
    "vs",
    accuracy_holdout_normalized,
)

Akurasi Hold-out Method (70%-30%) dengan Normalisasi: 0.9182389937106918
Perbandingan Akurasi Hold-out Method (70%-30%) Tanpa Normalisasi dan dengan Normalisasi: 0.9182389937106918 vs 0.9182389937106918
