In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Generate data contoh
try:
    df = pd.read_csv("taxi_trip_pricing.csv")
    print("Dataset Tarif Taxi berhasil dimuat.")
except FileNotFoundError:
    print("Error: File 'taxi_trip_pricing.csv' tidak ditemukan. Pastikan nama file sudah benar.")
    exit()

# 2. Persiapan Data dan Pembersihan (Sederhana)
features = ["Trip_Distance_km", "Time_of_Day", "Day_of_Week", "Passenger_Count", "Traffic_Conditions", "Weather"]
data = df[features + ["Trip_Price"]].copy()
data.dropna(inplace=True)
data["Time_of_Day"] = data["Time_of_Day"].map({"Morning": 0, "Afternoon": 1, "Evening":2, "Night":3})
data["Day_of_Week"] = data["Day_of_Week"].map({"Weekday": 0, "Weekend": 1})
data["Traffic_Conditions"] = data["Traffic_Conditions"].map({"Low": 0, "Medium": 1, "High":2})
data["Weather"] = data["Weather"].map({"Clear": 0, "Rain": 1, "Snow":2})
price_bins = [0, 50, 100, float('inf')]
price_labels = ['Murah', 'Sedang', 'Mahal']
data['Price_Category'] = pd.cut(data['Trip_Price'], bins=price_bins, labels=price_labels)
target = "Price_Category"

# Bagi data menjadi fitur (X) dan target (y)
X = data[features]
y = data[target]

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Skala fitur menggunakan StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Bangun model k-Nearest Neighbor
k = 5  # Jumlah tetangga terdekat
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train, y_train)

# Prediksi pada data uji
y_pred = model.predict(X_test)

# Evaluasi model
accuracy = accuracy_score(y_test, y_pred)
print("Akurasi:", accuracy)

print("\nLaporan Klasifikasi:")
print(classification_report(y_test, y_pred))

print("\nMatriks Konfusi:")
print(confusion_matrix(y_test, y_pred))

# 7. Prediksi untuk data baru
new_trip = pd.DataFrame({
    "Trip_Distance_km": [20], 
    "Time_of_Day": [1], 
    "Day_of_Week": [1], 
    "Passenger_Count": [2], 
    "Traffic_Conditions": [1], 
    "Weather": [0]
})
new_trip_scaled = scaler.transform(new_trip)
predicted_category = model.predict(new_trip_scaled)
print(f"\nPrediksi kategori harga untuk perjalanan baru: {predicted_category[0]}")

Dataset Tarif Taxi berhasil dimuat.
Akurasi: 0.6076555023923444

Laporan Klasifikasi:
              precision    recall  f1-score   support

       Mahal       0.75      0.46      0.57        13
       Murah       0.63      0.68      0.65       105
      Sedang       0.56      0.55      0.56        91

    accuracy                           0.61       209
   macro avg       0.65      0.56      0.59       209
weighted avg       0.61      0.61      0.61       209


Matriks Konfusi:
[[ 6  1  6]
 [ 1 71 33]
 [ 1 40 50]]

Prediksi kategori harga untuk perjalanan baru: Murah
