In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# 1. Muat Dataset
try:
    df = pd.read_csv("taxi_trip_pricing.csv")
    print("Dataset Tarif Taxi berhasil dimuat.")
except FileNotFoundError:
    print("Error: File 'taxi_trip_pricing.csv' tidak ditemukan. Pastikan nama file sudah benar.")
    exit()

# 2. Persiapan Data dan Pembersihan (Sederhana)
features = ["Trip_Distance_km", "Time_of_Day", "Day_of_Week", "Passenger_Count", "Traffic_Conditions", "Weather"]
target = "Trip_Price"
data = df[[target] + features].copy()
data.dropna(inplace=True)
data["Time_of_Day"] = data["Time_of_Day"].map({"Morning": 0, "Afternoon": 1, "Evening":2, "Night":3})
data["Day_of_Week"] = data["Day_of_Week"].map({"Weekday": 0, "Weekend": 1})
data["Traffic_Conditions"] = data["Traffic_Conditions"].map({"Low": 0, "Medium": 1, "High":2})
data["Weather"] = data["Weather"].map({"Clear": 0, "Rain": 1, "Snow":2})

# 3. Bagi Data Menjadi Data Latih dan Data Uji
X = data[features]
y = data[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Bangun model regresi linier
model = LinearRegression()
model.fit(X_train, y_train)

# 5. Prediksi penjualan pada data uji
y_pred = model.predict(X_test)

# 6. Evaluasi model (contoh: menggunakan R-squared)
r_squared = model.score(X_test, y_test)
print("R-squared:", r_squared)

# 7. Prediksi untuk data baru
new_trip = pd.DataFrame({
    "Trip_Distance_km": [20], 
    "Time_of_Day": [1], 
    "Day_of_Week": [1], 
    "Passenger_Count": [2], 
    "Traffic_Conditions": [1], 
    "Weather": [0]
})
predicted_price = model.predict(new_trip)
print(f"\nPrediksi harga untuk perjalanan baru: ${predicted_price[0]:.2f}")

Dataset Tarif Taxi berhasil dimuat.
R-squared: 0.5842982741370999

Prediksi harga untuk perjalanan baru: $42.30
