In [3]:
%debug

import tkinter as tk
from tkinter import ttk, scrolledtext
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.exceptions import NotFittedError

try:
    with open('best_xgb_model.pkl', 'rb') as model_file:
        model = pickle.load(model_file)
        print("Model loaded successfully.")
        #print("Model attributes:", model.get_params())  # Print model attributes for debugging
        print(type(model))  # Print model for debugging
except (FileNotFoundError, EOFError, pickle.UnpicklingError):
    # Handle file not found or unpickling errors
    print("Error loading XGBoost model.")

# Load MinMaxScaler
with open('min_max_scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Load LabelEncoder for 'brand'
with open('label_encoder_brand.pkl', 'rb') as label_encoder_file:
    label_encoder_brand = pickle.load(label_encoder_file)

# Check if the model is fitted
if not hasattr(model, '_Booster') or model._Booster is None:
    raise NotFittedError("The XGBoost model needs to be fitted or loaded before making predictions.")

# Function to perform feature engineering and scaling
def perform_feature_engineering_and_scaling(df):
    # Drop unnecessary columns
    df = df.drop(['seller', 'offerType', 'model', 'name', 'dateCrawled', 'lastSeen', 'dateCreated', 'postalCode'], axis=1)
    
    # Creating dummies
    df = pd.get_dummies(df, columns=['abtest','vehicleType', 'fuelType', 'gearbox', 'notRepairedDamage'], drop_first=True)

    # Ensure that dummy columns are consistent with the training data
    dummy_columns_train = set(['abtest_test', 'vehicleType_cabrio', 'vehicleType_coupe', 'vehicleType_limousine', 'vehicleType_others',
                                'vehicleType_small car', 'vehicleType_station wagon', 'vehicleType_suv', 'fuelType_diesel',
                                'fuelType_electro', 'fuelType_hybrid', 'fuelType_lpg', 'fuelType_other', 'fuelType_petrol',
                                'gearbox_manual', 'notRepairedDamage_yes'])

    dummy_columns_current = set(df.columns)
    missing_dummy_columns = dummy_columns_train - dummy_columns_current
    for column in missing_dummy_columns:
        df[column] = 0

    extra_dummy_columns = dummy_columns_current - dummy_columns_train
    df = df.drop(extra_dummy_columns, axis=1)
    
    # Label encoding for the 'brand' column
    if 'brand' in df.columns:
        df['brand_encoded'] = label_encoder_brand.transform(df['brand'])
        # Dropping brand column
        df = df.drop('brand', axis=1)
    else:
        # If 'brand' column is not present, you may need to handle this case based on your requirements
        # For now, let's assume the 'brand' column is missing and set 'brand_encoded' to a default value
        df['brand_encoded'] = 0  # You may need to choose a suitable default value

#     # Convert 'yearOfRegistration' to numeric
#     df['yearOfRegistration'] = pd.to_numeric(df['yearOfRegistration'], errors='coerce')

    # Calculate the age of the vehicle
    df['yearOfRegistration'] = df['yearOfRegistration'].apply(lambda x: 2023 - x)

    # Rename column name from 'yearOfRegistration' to 'YearsFromRegistration'
    df.rename(columns={ 'yearOfRegistration':'YearsFromRegistration'}, inplace=True)

#     # Convert 'yearOfRegistration' to numeric
#     df['monthOfRegistration'] = pd.to_numeric(df['monthOfRegistration'], errors='coerce')

#     # Convert 'powerPS' to numeric
#     df['powerPS'] = pd.to_numeric(df['powerPS'], errors='coerce')

#     # Convert 'kilometer' to numeric
#     df['kilometer'] = pd.to_numeric(df['kilometer'], errors='coerce')

    # Perform binning for "monthOfRegistration" column
    bins = [0, 3, 6, 9, 12]
    labels = ["Q1", "Q2", "Q3", "Q4"]
    df['monthOfRegistration'] = pd.cut(df['monthOfRegistration'], bins=bins, labels=labels)
    df = pd.get_dummies(df, columns=['monthOfRegistration'], drop_first=True)

    # Scale the specified columns
    columns_to_scale = ['YearsFromRegistration', 'powerPS', 'kilometer', 'brand_encoded']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])

    return df

# Function to predict price
def predict_price(features):
    # Load the features into a DataFrame
    input_data = pd.DataFrame(features, index=[0])

    # Perform feature engineering and scaling
    input_data = perform_feature_engineering_and_scaling(input_data)
    print(input_data)
    # Predict the price using the pre-trained model
    predicted_price = model.predict(input_data)

    return predicted_price[0]

# Tkinter GUI
class CarPricePredictionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Car Price Prediction App")

        # Create a scrolled text widget for displaying the output
        self.output_text = scrolledtext.ScrolledText(root, width=50, height=10)
        self.output_text.grid(row=0, column=2, rowspan=20, padx=10, pady=10, sticky="nsew")

        # Create and place input labels and entry widgets
        self.input_entries = {}
        input_features = ['dateCrawled', 'name', 'seller', 'offerType', 'abtest', 'vehicleType', 'yearOfRegistration',
                           'gearbox', 'powerPS', 'model', 'kilometer', 'monthOfRegistration', 'fuelType', 'brand',
                           'notRepairedDamage', 'dateCreated', 'postalCode', 'lastSeen']

        for feature in input_features:
            label = ttk.Label(root, text=feature)
            label.grid(row=input_features.index(feature), column=0, padx=10, pady=10, sticky=tk.W)

            entry = ttk.Entry(root)
            entry.grid(row=input_features.index(feature), column=1, padx=10, pady=10, sticky=tk.W)
            self.input_entries[feature] = entry

        # Create and place the Predict button
        predict_button = ttk.Button(root, text="Predict Price", command=self.predict_price)
        predict_button.grid(row=len(input_features), column=0, columnspan=2, pady=20)

    def predict_price(self):
        # Get user inputs
        features = {feature: entry.get() for feature, entry in self.input_entries.items()}

        # Convert user inputs to appropriate data types
        features['yearOfRegistration'] = pd.to_numeric(features['yearOfRegistration'], errors='coerce')
        features['powerPS'] = pd.to_numeric(features['powerPS'], errors='coerce')
        features['kilometer'] = pd.to_numeric(features['kilometer'], errors='coerce')

        # Perform prediction
        predicted_price = predict_price(features)

        # Display the predicted price
        self.output_text.delete(1.0, tk.END)  # Clear previous text
        self.output_text.insert(tk.END, f"Predicted Price: {predicted_price:.2f} USD\n")


if __name__ == "__main__":
    root = tk.Tk()
    app = CarPricePredictionApp(root)

    # Configure the grid to expand with the window
    root.columnconfigure(2, weight=1)
    root.rowconfigure(0, weight=1)

    root.mainloop()


ERROR:root:No traceback has been produced, nothing to debug.


Model loaded successfully.
<class 'xgboost.sklearn.XGBRegressor'>


In [None]:
import tkinter as tk
from tkinter import ttk, scrolledtext
import pandas as pd
import pickle
from sklearn.exceptions import NotFittedError

def load_model():
    try:
        with open('best_xgb_model.pkl', 'rb') as model_file:
            model = pickle.load(model_file)
            print("Model loaded successfully.")
        return model
    except (FileNotFoundError, EOFError, pickle.UnpicklingError):
        print("Error loading XGBoost model.")
        return None

def load_scaler():
    with open('min_max_scaler.pkl', 'rb') as scaler_file:
        scaler = pickle.load(scaler_file)
    return scaler

def load_label_encoder():
    with open('label_encoder_brand.pkl', 'rb') as label_encoder_file:
        label_encoder = pickle.load(label_encoder_file)
    return label_encoder

def check_model_fitted(model):
    if not hasattr(model, '_Booster') or model._Booster is None:
        raise NotFittedError("The XGBoost model needs to be fitted or loaded before making predictions.")

def perform_feature_engineering_and_scaling(df, label_encoder_brand, scaler):
    # Drop unnecessary columns
    df = df.drop(['seller', 'offerType', 'model', 'name', 'dateCrawled', 'lastSeen', 'dateCreated', 'postalCode'], axis=1)
    
    # Creating dummies
    df = pd.get_dummies(df, columns=['abtest','vehicleType', 'fuelType', 'gearbox', 'notRepairedDamage'], drop_first=True)

    # Ensure that dummy columns are consistent with the training data
    dummy_columns_train = set(['abtest_test', 'vehicleType_cabrio', 'vehicleType_coupe', 'vehicleType_limousine', 'vehicleType_others',
                                'vehicleType_small car', 'vehicleType_station wagon', 'vehicleType_suv', 'fuelType_diesel',
                                'fuelType_electro', 'fuelType_hybrid', 'fuelType_lpg', 'fuelType_other', 'fuelType_petrol',
                                'gearbox_manual', 'notRepairedDamage_yes'])

    dummy_columns_current = set(df.columns)
    missing_dummy_columns = dummy_columns_train - dummy_columns_current
    for column in missing_dummy_columns:
        df[column] = 0

    extra_dummy_columns = dummy_columns_current - dummy_columns_train
    df = df.drop(extra_dummy_columns, axis=1)
    
    # Label encoding for the 'brand' column
    if 'brand' in df.columns:
        df['brand_encoded'] = label_encoder_brand.transform(df['brand'])
        # Dropping brand column
        df = df.drop('brand', axis=1)
    else:
        # If 'brand' column is not present, you may need to handle this case based on your requirements
        # For now, let's assume the 'brand' column is missing and set 'brand_encoded' to a default value
        df['brand_encoded'] = 0  # You may need to choose a suitable default value

    # Calculate the age of the vehicle
    df['yearOfRegistration'] = df['yearOfRegistration'].apply(lambda x: 2023 - x)

    # Rename column name from 'yearOfRegistration' to 'YearsFromRegistration'
    df.rename(columns={ 'yearOfRegistration':'YearsFromRegistration'}, inplace=True)

    # Perform binning for "monthOfRegistration" column
    bins = [0, 3, 6, 9, 12]
    labels = ["Q1", "Q2", "Q3", "Q4"]
    df['monthOfRegistration'] = pd.cut(df['monthOfRegistration'], bins=bins, labels=labels)
    df = pd.get_dummies(df, columns=['monthOfRegistration'], drop_first=True)

    # Scale the specified columns
    columns_to_scale = ['YearsFromRegistration', 'powerPS', 'kilometer', 'brand_encoded']
    df[columns_to_scale] = scaler.transform(df[columns_to_scale])

    return df

def predict_price(model, input_data):
    predicted_price = model.predict(input_data)
    return predicted_price[0]

def create_gui(root):
    root.title("Car Price Prediction App")

    output_text = scrolledtext.ScrolledText(root, width=50, height=10)
    output_text.grid(row=0, column=2, rowspan=20, padx=10, pady=10, sticky="nsew")

    input_entries = {}
    input_features = ['dateCrawled', 'name', 'seller', 'offerType', 'abtest', 'vehicleType', 'yearOfRegistration',
                       'gearbox', 'powerPS', 'model', 'kilometer', 'monthOfRegistration', 'fuelType', 'brand',
                       'notRepairedDamage', 'dateCreated', 'postalCode', 'lastSeen']

    for feature in input_features:
        label = ttk.Label(root, text=feature)
        label.grid(row=input_features.index(feature), column=0, padx=10, pady=10, sticky=tk.W)

        entry = ttk.Entry(root)
        entry.grid(row=input_features.index(feature), column=1, padx=10, pady=10, sticky=tk.W)
        input_entries[feature] = entry

    def predict_price_callback():
        features = {feature: entry.get() for feature, entry in input_entries.items()}
        features['yearOfRegistration'] = pd.to_numeric(features['yearOfRegistration'], errors='coerce')
        features['powerPS'] = pd.to_numeric(features['powerPS'], errors='coerce')
        features['kilometer'] = pd.to_numeric(features['kilometer'], errors='coerce')

        model = load_model()
        scaler = load_scaler()
        label_encoder_brand = load_label_encoder()

        check_model_fitted(model)

        input_data = pd.DataFrame(features, index=[0])
        input_data = perform_feature_engineering_and_scaling(input_data, label_encoder_brand, scaler)

        try:
            check_model_fitted(model)
            predicted_price = predict_price(model, input_data)
            output_text.delete(1.0, tk.END)
            output_text.insert(tk.END, f"Predicted Price: {predicted_price:.2f} USD\n")
        except NotFittedError as e:
            output_text.delete(1.0, tk.END)
            output_text.insert(tk.END, f"Error: {str(e)}\n")

    predict_button = ttk.Button(root, text="Predict Price", command=predict_price_callback)
    predict_button.grid(row=len(input_features), column=0, columnspan=2, pady=20)

if __name__ == "__main__":
    root = tk.Tk()
    create_gui(root)
    root.mainloop()
