['YearsFromRegistration', 'powerPS', 'kilometer', 'abtest_test', 'vehicleType_cabrio', 'vehicleType_coupe', 'vehicleType_limousine', 'vehicleType_others', 'vehicleType_small car', 'vehicleType_station wagon', 'vehicleType_suv', 'fuelType_diesel', 'fuelType_electro', 'fuelType_hybrid', 'fuelType_lpg', 'fuelType_other', 'fuelType_petrol', 'gearbox_manual', 'notRepairedDamage_yes', 'brand_encoded', 'monthOfRegistration_Q2', 'monthOfRegistration_Q3', 'monthOfRegistration_Q4'] ['YearsFromRegistration', 'powerPS', 'kilometer', 'vehicleType_suv', 'gearbox_manual', 'vehicleType_coupe', 'vehicleType_cabrio', 'fuelType_diesel', 'vehicleType_station wagon', 'abtest_test', 'vehicleType_small car', 'vehicleType_others', 'vehicleType_limousine', 'fuelType_lpg', 'fuelType_other', 'notRepairedDamage_yes', 'fuelType_electro', 'fuelType_hybrid', 'fuelType_petrol', 'brand_encoded', 'monthOfRegistration_Q2', 'monthOfRegistration_Q3', 'monthOfRegistration_Q4']

In [None]:
import tkinter as tk
from tkinter import ttk, scrolledtext
import pandas as pd
import pickle
from sklearn.exceptions import NotFittedError

def load_model():
    try:
        with open('best_xgb_model.pkl', 'rb') as model_file:
            model = pickle.load(model_file)
            print("Model loaded successfully.")
        return model
    except (FileNotFoundError, EOFError, pickle.UnpicklingError):
        print("Error loading XGBoost model.")
        return None

def load_scaler():
    with open('min_max_scaler.pkl', 'rb') as scaler_file:
        scaler = pickle.load(scaler_file)
    return scaler

def load_label_encoder():
    with open('label_encoder_brand.pkl', 'rb') as label_encoder_file:
        label_encoder = pickle.load(label_encoder_file)
    return label_encoder

def check_model_fitted(model):
    if not hasattr(model, '_Booster') or model._Booster is None:
        raise NotFittedError("The XGBoost model needs to be fitted or loaded before making predictions.")

def perform_feature_engineering_and_scaling(df, label_encoder_brand, scaler):
    # Drop unnecessary columns
    df = df.drop(['seller', 'offerType', 'model', 'name', 'dateCrawled', 'lastSeen', 'dateCreated', 'postalCode'], axis=1)

    # Creating dummies
    df = pd.get_dummies(df, columns=['abtest', 'vehicleType', 'fuelType', 'gearbox', 'notRepairedDamage'], drop_first=True)

    # Ensure that dummy columns are consistent with the training data
    dummy_columns_train = set(['abtest_test', 'vehicleType_cabrio', 'vehicleType_coupe', 'vehicleType_limousine', 'vehicleType_others',
                                'vehicleType_small car', 'vehicleType_station wagon', 'vehicleType_suv', 'fuelType_diesel',
                                'fuelType_electro', 'fuelType_hybrid', 'fuelType_lpg', 'fuelType_other', 'fuelType_petrol',
                                'gearbox_manual', 'notRepairedDamage_yes'])

    # Extract dummy columns from the current DataFrame
    dummy_columns_current = set(df.columns) & dummy_columns_train

    # Add missing dummy columns with default values
    missing_dummy_columns = dummy_columns_train - dummy_columns_current
    for column in missing_dummy_columns:
        df[column] = 0

    # Drop extra dummy columns
    extra_dummy_columns = dummy_columns_current - dummy_columns_train
    df = df.drop(extra_dummy_columns, axis=1)

    # Label encoding for the 'brand' column
    if 'brand' in df.columns:
        df['brand_encoded'] = label_encoder_brand.transform(df['brand'])
        # Dropping brand column
        df = df.drop('brand', axis=1)
    else:
        # If 'brand' column is not present, you may need to handle this case based on your requirements
        # For now, let's assume the 'brand' column is missing and set 'brand_encoded' to a default value
        df['brand_encoded'] = 0  # You may need to choose a suitable default value

    # Convert 'yearOfRegistration' to numeric and calculate the age of the vehicle
    df['yearOfRegistration'] = df['yearOfRegistration'].apply(lambda x: 2023 - x)

    # Rename column name from 'yearOfRegistration' to 'YearsFromRegistration'
    df.rename(columns={'yearOfRegistration': 'YearsFromRegistration'}, inplace=True)

    # Perform binning for "monthOfRegistration" column
    bins = [0, 3, 6, 9, 12]
    labels = ["Q1", "Q2", "Q3", "Q4"]
    df['monthOfRegistration'] = pd.cut(df['monthOfRegistration'], bins=bins, labels=labels)
    df = pd.get_dummies(df, columns=['monthOfRegistration'], drop_first=True)

    # Scale the specified columns
    columns_to_scale = ['YearsFromRegistration', 'powerPS', 'kilometer', 'brand_encoded']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])

    return df

def predict_price(model, input_data):
    predicted_price = model.predict(input_data)
    return predicted_price[0]

def create_gui(root):
    # Load model, scaler, and label encoder
    model = load_model()
    scaler = load_scaler()
    label_encoder_brand = load_label_encoder()

    # Check if the model is fitted
    check_model_fitted(model)

    # Feature Engineering and Scaling Function
    def feature_engineering_and_scaling():
        # Get user inputs
        input_features = {feature: entry.get() for feature, entry in input_entries.items()}

        # Convert user inputs to appropriate data types
        input_features['yearOfRegistration'] = pd.to_numeric(input_features['yearOfRegistration'], errors='coerce')
        input_features['powerPS'] = pd.to_numeric(input_features['powerPS'], errors='coerce')
        input_features['kilometer'] = pd.to_numeric(input_features['kilometer'], errors='coerce')
        input_features['monthOfRegistration'] = pd.to_numeric(input_features['monthOfRegistration'], errors='coerce')
        # Convert input features to DataFrame
        input_data = pd.DataFrame(input_features, index=[0])

        # Perform feature engineering and scaling
        input_data = perform_feature_engineering_and_scaling(input_data, label_encoder_brand, scaler)

        return input_data

    # Prediction Function
    def predict_and_display_price():
        # Perform feature engineering and scaling
        input_data = feature_engineering_and_scaling()

        # Predict the price using the pre-trained model
        predicted_price = predict_price(model, input_data)

        # Display the predicted price
        output_text.delete(1.0, tk.END)  # Clear previous text
        output_text.insert(tk.END, f"Predicted Price: {predicted_price:.2f} USD\n")

    root.title("Car Price Prediction App")

    # Create a scrolled text widget for displaying the output
    output_text = scrolledtext.ScrolledText(root, width=50, height=10)
    output_text.grid(row=0, column=2, rowspan=20, padx=10, pady=10, sticky="nsew")

    # Create and place input labels and entry widgets
    input_entries = {}
    input_features = ['dateCrawled', 'name', 'seller', 'offerType', 'abtest', 'vehicleType', 'yearOfRegistration',
                       'gearbox', 'powerPS', 'model', 'kilometer', 'monthOfRegistration', 'fuelType', 'brand',
                       'notRepairedDamage', 'dateCreated', 'postalCode', 'lastSeen']

    canvas = tk.Canvas(root)
    canvas.grid(row=0, column=0, padx=10, pady=10, sticky="nsew")

    frame = ttk.Frame(canvas)
    canvas.create_window((0, 0), window=frame, anchor="nw")

    for i, feature in enumerate(input_features):
        label = ttk.Label(frame, text=feature)
        label.grid(row=i, column=0, padx=10, pady=10, sticky=tk.W)

        entry = ttk.Entry(frame)
        entry.grid(row=i, column=1, padx=10, pady=10, sticky=tk.W)
        input_entries[feature] = entry

    # Create and place the Predict button
    predict_button = ttk.Button(frame, text="Predict Price", command=predict_and_display_price)
    predict_button.grid(row=len(input_features), column=0, columnspan=2, pady=20)

    # Adding a vertical scrollbar for input features frame
    vsb_input_features = ttk.Scrollbar(root, orient="vertical", command=canvas.yview)
    vsb_input_features.grid(row=0, column=1, rowspan=20, sticky="ns")
    canvas.configure(yscrollcommand=vsb_input_features.set)

    # Configure the grid to expand with the window
    for i in range(len(input_features)):
        root.rowconfigure(i, weight=1)

    # Configure canvas scrolling region
    canvas.update_idletasks()
    canvas.config(scrollregion=canvas.bbox("all"))

if __name__ == "__main__":
    root = tk.Tk()
    create_gui(root)
    root.mainloop()


Model loaded successfully.


Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\Jagadeeshilpi\anaconda3\lib\tkinter\__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:\Users\JAGADE~1\AppData\Local\Temp/ipykernel_6344/1611203717.py", line 121, in predict_and_display_price
    predicted_price = predict_price(model, input_data)
  File "C:\Users\JAGADE~1\AppData\Local\Temp/ipykernel_6344/1611203717.py", line 85, in predict_price
    predicted_price = model.predict(input_data)
  File "C:\Users\Jagadeeshilpi\anaconda3\lib\site-packages\xgboost\sklearn.py", line 1164, in predict
    predts = self.get_booster().inplace_predict(
  File "C:\Users\Jagadeeshilpi\anaconda3\lib\site-packages\xgboost\core.py", line 2417, in inplace_predict
    self._validate_features(fns)
  File "C:\Users\Jagadeeshilpi\anaconda3\lib\site-packages\xgboost\core.py", line 2969, in _validate_features
    raise ValueError(msg.format(self.feature_names, feature_names))
ValueError: feature_names mi