In [1]:
import numpy as np
import pandas as pd
from collections import defaultdict

In [3]:
# Load dataset
df = pd.read_csv('carEvaluation.csv')

# Mapping kategori ke angka
mappings = {}
for column in df.columns:
    unique_vals = df[column].unique()
    mappings[column] = {val: i for i, val in enumerate(unique_vals)}
    df[column] = df[column].map(mappings[column])

# Pisahkan fitur dan label
X = df.iloc[:, :-1].values 
y = df.iloc[:, -1].values

# Split data (80% train, 20% test)
split_idx = int(0.8 * len(df))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]


In [4]:
# Naïve Bayes Classifier
class NaiveBayes:
    def __init__(self):
        self.class_probs = {}  # Probabilitas setiap kelas
        self.feature_probs = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))  # Probabilitas fitur
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        unique_classes, class_counts = np.unique(y, return_counts=True)
        
        # Hitung probabilitas prior P(C)
        for cls, count in zip(unique_classes, class_counts):
            self.class_probs[cls] = count / n_samples
        
        # Hitung probabilitas kondisional P(X|C)
        for cls in unique_classes:
            X_cls = X[y == cls]  # Ambil hanya data dengan kelas tertentu
            for feature_idx in range(n_features):
                feature_vals, feature_counts = np.unique(X_cls[:, feature_idx], return_counts=True)
                total_count = len(X_cls)
                for val, count in zip(feature_vals, feature_counts):
                    self.feature_probs[cls][feature_idx][val] = count / total_count

    def predict(self, X):
        predictions = []
        for x in X:
            class_scores = {}
            for cls in self.class_probs:
                class_scores[cls] = np.log(self.class_probs[cls])  # Gunakan log untuk menghindari underflow
                for feature_idx, feature_val in enumerate(x):
                    if feature_val in self.feature_probs[cls][feature_idx]:
                        class_scores[cls] += np.log(self.feature_probs[cls][feature_idx][feature_val])
                    else:
                        class_scores[cls] += np.log(1e-6)  # Probabilitas kecil jika tidak ditemukan
            
            predictions.append(max(class_scores, key=class_scores.get))  # Pilih kelas dengan skor tertinggi
        return np.array(predictions)

In [6]:
# Training model
nb_model = NaiveBayes()
nb_model.fit(X_train, y_train)

# Prediksi data uji
y_pred = nb_model.predict(X_test)

# Evaluasi akurasi
accuracy = (y_pred == y_test).mean() * 100
print("Akurasi Naive Bayes:", accuracy, "%")

Akurasi Naive Bayes: 67.63005780346822 %
