In [None]:
import joblib
import numpy as np
import pandas as pd
import tkinter as tk
from tkinter import messagebox, font
from sklearn.preprocessing import StandardScaler

# Hypothetical data for fitting the scaler
sample_data = {
    'trip_distance': [0.1, 10.0, 20.0, 30.0, 50.0],
    'trip_duration': [5.0, 15.0, 30.0, 60.0, 120.0],
    'speed_mph': [5.0, 15.0, 25.0, 35.0, 45.0],
    'tip_amount': [0.0, 5.0, 10.0, 15.0, 20.0]
}

sample_df = pd.DataFrame(sample_data)

# Initialize and fit the scaler
scaler = StandardScaler()
scaler.fit(sample_df)

# Define the expected feature names (model's expected feature names)
expected_feature_names = [
    'pickup_time_of_day_evening', 'pickup_time_of_day_morning', 'pickup_time_of_day_night',
    'pickup_day_type_weekend', 'pickup_season_spring', 'pickup_season_summer',
    'pickup_season_winter', 'is_holiday_1', 'PUcategory_City Center',
    'PUcategory_Other', 'PUcategory_Suburbs', 'DOcategory_City Center',
    'DOcategory_Other', 'DOcategory_Suburbs', 'trip_distance', 'trip_duration',
    'speed_mph', 'tip_amount'
]

# Define the one-hot encoding mappings for categorical features
categorical_mappings = {
    'pickup_time_of_day': ['evening', 'morning', 'night'],
    'pickup_day_type': ['weekend'],
    'pickup_season': ['spring', 'summer', 'winter'],
    'is_holiday': ['1'],
    'PUcategory': ['City Center', 'Other', 'Suburbs'],
    'DOcategory': ['City Center', 'Other', 'Suburbs']
}

# Function to validate user inputs
def validate_inputs(trip_distance, trip_duration, speed_mph, tip_amount):
    if not (0 < trip_distance <= 100):
        return False, "Trip distance must be between 0 and 100 miles."
    if not (0 < trip_duration <= 300):
        return False, "Trip duration must be between 0 and 300 minutes."
    if not (0 < speed_mph <= 100):
        return False, "Speed must be between 0 and 100 mph."
    if not (0 <= tip_amount <= 100):
        return False, "Tip amount must be between 0 and 100 dollars."
    return True, ""

# Function to apply log transformations
def log_transform(df, columns):
    for col in columns:
        df[col] = np.log1p(df[col])
    return df

# Function to scale numerical features
def scale_features(df, numerical_features, scaler):
    scaled_features = scaler.transform(df[numerical_features])
    scaled_df = pd.DataFrame(scaled_features, columns=numerical_features)
    df = df.drop(columns=numerical_features).reset_index(drop=True)
    df = pd.concat([df, scaled_df], axis=1)
    return df

# Function to ensure the input data matches the expected feature names and order
def ensure_feature_names_and_order(df, expected_feature_names):
    for feature in expected_feature_names:
        if feature not in df.columns:
            df[feature] = 0
    df = df[expected_feature_names]
    print("DataFrame columns after ordering:", df.columns.tolist())  # Debug statement
    return df

# Function to transform categorical features
def transform_categorical_features(input_data, mappings):
    transformed_data = {}
    for feature, categories in mappings.items():
        for category in categories:
            column_name = f"{feature}_{category}"
            transformed_data[column_name] = 1 if input_data.get(feature) == category else 0
    return transformed_data

# Define numerical and categorical features
numerical_features = ['trip_distance', 'trip_duration', 'speed_mph', 'tip_amount']

# Load the trained model
print("Loading model...")
try:
    model = joblib.load('xgboost_model_final.pkl')
    print("Model loaded successfully.")
    
    # Print the model's expected feature names
    model_feature_names = model.get_booster().feature_names
    print("Model's expected feature names:", model_feature_names)  # Debug statement

except Exception as e:
    print(f"Error loading model: {e}")
    exit()  # Exit if loading fails

# Function to preprocess a single row of data
def preprocess_input(input_data):
    df = pd.DataFrame([input_data])
    # Apply log transformation
    df = log_transform(df, ['trip_distance', 'trip_duration', 'tip_amount'])
    # Scale numerical features
    df = scale_features(df, numerical_features, scaler)
    # Transform categorical features
    transformed_categorical = transform_categorical_features(input_data, categorical_mappings)
    df = df.assign(**transformed_categorical)
    # Ensure the input data matches the expected feature names and order
    df = ensure_feature_names_and_order(df, expected_feature_names)
    print("Preprocessed DataFrame:", df)  # Debug statement
    return df

# Function to predict fare
def predict_fare():
    try:
        # Collect input values
        trip_distance = float(entry_distance.get())
        trip_duration = float(entry_duration.get())
        speed_mph = float(entry_speed.get())
        tip_amount = float(entry_tip.get())
        pickup_time_of_day = var_time_of_day.get()
        pickup_day_type = var_day_type.get()
        pickup_season = var_season.get()
        is_holiday = var_holiday.get()
        PUcategory = var_PUcategory.get()
        DOcategory = var_DOcategory.get()

        # Validate inputs
        is_valid, message = validate_inputs(trip_distance, trip_duration, speed_mph, tip_amount)
        if not is_valid:
            messagebox.showerror("Input Error", message)
            return

        # Create a dictionary from inputs
        input_data = {
            'trip_distance': trip_distance,
            'trip_duration': trip_duration,
            'speed_mph': speed_mph,
            'tip_amount': tip_amount,
            'pickup_time_of_day': pickup_time_of_day,
            'pickup_day_type': pickup_day_type,
            'pickup_season': pickup_season,
            'is_holiday': is_holiday,
            'PUcategory': PUcategory,
            'DOcategory': DOcategory
        }

        # Preprocess the input
        print("Preprocessing the input data...")
        processed_input = preprocess_input(input_data)

        # Predict fare
        print("Predicting the fare...")
        fare = model.predict(processed_input)

        # Display the result
        messagebox.showinfo("Estimated Fare", f"Estimated Fare: ${fare[0]:.2f}")
    except Exception as e:
        messagebox.showerror("Error", str(e))

# Create the main window
root = tk.Tk()
root.title("Taxi Fare Estimator")
root.configure(bg="#2c3e50")  # Set background color

# Define font styles
label_font = font.Font(family="Helvetica", size=14, weight="bold")
entry_font = font.Font(family="Helvetica", size=12)
button_font = font.Font(family="Helvetica", size=14, weight="bold")

# Create and place labels and entry widgets with styles
tk.Label(root, text="Trip Distance (miles)", font=label_font, fg="white", bg="#2c3e50").grid(row=0, column=0, pady=10, padx=10, sticky="e")
entry_distance = tk.Entry(root, font=entry_font)
entry_distance.grid(row=0, column=1, pady=10, padx=10)

tk.Label(root, text="Trip Duration (minutes)", font=label_font, fg="white", bg="#2c3e50").grid(row=1, column=0, pady=10, padx=10, sticky="e")
entry_duration = tk.Entry(root, font=entry_font)
entry_duration.grid(row=1, column=1, pady=10, padx=10)

tk.Label(root, text="Speed (mph)", font=label_font, fg="white", bg="#2c3e50").grid(row=2, column=0, pady=10, padx=10, sticky="e")
entry_speed = tk.Entry(root, font=entry_font)
entry_speed.grid(row=2, column=1, pady=10, padx=10)

tk.Label(root, text="Tip Amount ($)", font=label_font, fg="white", bg="#2c3e50").grid(row=3, column=0, pady=10, padx=10, sticky="e")
entry_tip = tk.Entry(root, font=entry_font)
entry_tip.grid(row=3, column=1, pady=10, padx=10)

# Time of day options
tk.Label(root, text="Pickup Time of Day", font=label_font, fg="white", bg="#2c3e50").grid(row=4, column=0, pady=10, padx=10, sticky="e")
var_time_of_day = tk.StringVar(value="morning")
tk.OptionMenu(root, var_time_of_day, "morning", "evening", "night").grid(row=4, column=1, pady=10, padx=10, sticky="w")

# Day type options
tk.Label(root, text="Pickup Day Type", font=label_font, fg="white", bg="#2c3e50").grid(row=5, column=0, pady=10, padx=10, sticky="e")
var_day_type = tk.StringVar(value="weekday")
tk.OptionMenu(root, var_day_type, "weekday", "weekend").grid(row=5, column=1, pady=10, padx=10, sticky="w")

# Season options
tk.Label(root, text="Pickup Season", font=label_font, fg="white", bg="#2c3e50").grid(row=6, column=0, pady=10, padx=10, sticky="e")
var_season = tk.StringVar(value="spring")
tk.OptionMenu(root, var_season, "spring", "summer", "fall", "winter").grid(row=6, column=1, pady=10, padx=10, sticky="w")

# Holiday option
tk.Label(root, text="Is Holiday", font=label_font, fg="white", bg="#2c3e50").grid(row=7, column=0, pady=10, padx=10, sticky="e")
var_holiday = tk.StringVar(value="no")
tk.OptionMenu(root, var_holiday, "yes", "no").grid(row=7, column=1, pady=10, padx=10, sticky="w")

# Pickup category options
tk.Label(root, text="Pickup Category", font=label_font, fg="white", bg="#2c3e50").grid(row=8, column=0, pady=10, padx=10, sticky="e")
var_PUcategory = tk.StringVar(value="City Center")
tk.OptionMenu(root, var_PUcategory, "City Center", "Suburbs", "Other").grid(row=8, column=1, pady=10, padx=10, sticky="w")

# Dropoff category options
tk.Label(root, text="Dropoff Category", font=label_font, fg="white", bg="#2c3e50").grid(row=9, column=0, pady=10, padx=10, sticky="e")
var_DOcategory = tk.StringVar(value="City Center")
tk.OptionMenu(root, var_DOcategory, "City Center", "Suburbs", "Other").grid(row=9, column=1, pady=10, padx=10, sticky="w")

# Predict button with styling
tk.Button(root, text="Estimate Fare", command=predict_fare, font=button_font, bg="#e74c3c", fg="white").grid(row=10, column=0, columnspan=2, pady=20)

# Run the application
root.mainloop()
