In [2]:
import csv
from typing import List

def load_data(filename: str) -> List[dict]:
    data = []
    with open(filename, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            data.append(row)
    return data

def tokenize(row: dict) -> List[str]:
    tokens = []
    for key in row:
        if key == 'Transportasi' or key == 'No.':
            continue
        tokens.extend(row[key].lower().split())
    return tokens

def train_naive_bayes(data: List[dict]) -> dict:
    counts = {'Kendaraan pribadi': 0, 'Kendaraan umum': 0}
    tokens = {'Kendaraan pribadi': [], 'Kendaraan umum': []}
    
    for row in data:
        if row['Transportasi'] == '?':
            continue
        
        transportasi = row['Transportasi']
        counts[transportasi] += 1
        for token in tokenize(row):
            tokens[transportasi].append(token)
    
    model = {'counts': counts, 'tokens': tokens}
    return model

def calculate_probabilities(model: dict, row: dict) -> dict:
    counts = model['counts']
    tokens = model['tokens']
    total_count = sum(counts.values())
    
    probabilities = {}
    for transportasi in counts:
        transportasi_count = counts[transportasi]
        transportasi_tokens = tokens[transportasi]
        
        p_transportasi = transportasi_count / total_count
        
        p_row_given_transportasi = 1
        for token in tokenize(row):
            p_token_given_transportasi = transportasi_tokens.count(token) / len(transportasi_tokens)
            p_row_given_transportasi *= p_token_given_transportasi
        
        probabilities[transportasi] = p_transportasi * p_row_given_transportasi
    
    return probabilities

def predict_transportasi(data: List[dict], model: dict) -> List[str]:
    predictions = []
    for row in data:
        probabilities = calculate_probabilities(model, row)
        predicted_transportasi = max(probabilities, key=probabilities.get)
        predictions.append(predicted_transportasi)
    return predictions

# Muat data dari file CSV
data = load_data('tp2.csv')

# Latih model Naive Bayes
model = train_naive_bayes(data)

# Data baru untuk diprediksi
new_data = [
    {'No.': '11', 'Jenis Kelamin': 'Perempuan', 'Umur Karyawan': '27', 'Gaji': '12,000,000', 'Status': 'Single'},
    {'No.': '12', 'Jenis Kelamin': 'Laki-Laki', 'Umur Karyawan': '35', 'Gaji': '14,000,000', 'Status': 'Menikah'}
]

# Memprediksi transportasi untuk data baru
predictions = predict_transportasi(new_data, model)

# Menampilkan prediksi
for i, prediction in enumerate(predictions):
    print(f"Data {i+1}: Jenis transportasi yang diprediksi: {prediction}")


Data 1: Jenis transportasi yang diprediksi: Kendaraan pribadi
Data 2: Jenis transportasi yang diprediksi: Kendaraan umum
