In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


class NaiveBayesClassifier:
    def __init__(self):
        self.classes = None
        self.class_priors = None
        self.feature_probabilities = None

    def train(self, X, y):
        self.classes = np.unique(y)
        self.class_priors = {}
        self.feature_probabilities = {}

        # Menghitung prior probability untuk setiap kelas
        total_samples = len(y)
        for c in self.classes:
            class_samples = X[y == c]
            self.class_priors[c] = len(class_samples) / total_samples

            # Menghitung probability untuk setiap fitur dalam kelas
            self.feature_probabilities[c] = {}
            for feature in X.columns:
                feature_values = class_samples[feature]
                unique_values, counts = np.unique(
                    feature_values, return_counts=True)
                self.feature_probabilities[c][feature] = dict(
                    zip(unique_values, counts / len(feature_values)))

    def predict(self, X_test):
        predictions = []

        for _, row in X_test.iterrows():
            class_scores = {}

            # Menghitung score untuk setiap kelas berdasarkan probability dan prior probability
            for c in self.classes:
                class_scores[c] = self.class_priors[c]
                for feature, value in row.iteritems():
                    if value in self.feature_probabilities[c][feature]:
                        class_scores[c] *= self.feature_probabilities[c][feature][value]

            # Memprediksi kelas dengan nilai score tertinggi
            predicted_class = max(class_scores, key=class_scores.get)
            predictions.append(predicted_class)

        return predictions


# Mengambil data dari data.csv
data = pd.read_csv('data.csv')

# Memisahkan fitur dan target, yakni transportasi
dataX = data.drop('transportasi', axis=1)
dataY = data['transportasi']

# Membuat objek untuk model menggunakan Naive Bayes
model = NaiveBayesClassifier()

# Melatih model
model.train(dataX, dataY)

# Mengambil data baru
data_new = pd.read_csv('data_baru.csv')

# Memprediksi jenis transportasi untuk data baru
predictions = model.predict(data_new)

# Menampilkan hasil prediksi untuk data baru
for i in range(len(data_new)):
    print("Data baru ke-", i+1, ":")
    print("Jenis Kelamin :", data_new['jenis_kelamin'][i])
    print("Umur Karyawan :", data_new['umur_karyawan'][i])
    print("Gaji :", data_new['gaji'][i])
    print("Status :", data_new['status'][i])
    print("Transportasi :", predictions[i])
    print()


Data baru ke- 1 :
Jenis Kelamin: perempuan
Umur Karyawan: 27
Gaji: 12000000
Status: single
Transportasi: kendaraan_umum

Data baru ke- 2 :
Jenis Kelamin: laki-laki
Umur Karyawan: 35
Gaji: 14000000
Status: menikah
Transportasi: kendaraan_pribadi

