In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import tkinter as tk
from tkinter import filedialog, messagebox
from tkinter import ttk
from PIL import Image, ImageTk
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import joblib

# Function to load dataset
def load_dataset():
    global df
    file_path = filedialog.askopenfilename()
    if file_path:
        df = pd.read_csv(file_path)
        messagebox.showinfo("Information", "Dataset Loaded Successfully")

# Function to preprocess data
def preprocess_data():
    global X, y, X_train, X_test, y_train, y_test, pipeline, preprocessor
    df.replace("?", np.nan, inplace=True)
    numeric_cols = ['normalized-losses', 'wheel-base', 'length', 'width', 'height', 'curb-weight', 'engine-size',
                    'bore', 'stroke', 'compression-ratio', 'horsepower', 'peak-rpm', 'city-mpg', 'highway-mpg', 'price']
    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
    numeric_imputer = SimpleImputer(strategy='mean')
    df[numeric_cols] = numeric_imputer.fit_transform(df[numeric_cols])
    df.fillna(df.mode().iloc[0], inplace=True)
    X = df.drop(columns=['price'])
    y = df['price']
    categorical_features = X.select_dtypes(include=['object']).columns.tolist()
    numerical_features = X.select_dtypes(include=[np.number]).columns.tolist()
    numeric_transformer = Pipeline(steps=[
        ('scaler', StandardScaler())
    ])
    categorical_transformer = Pipeline(steps=[
        ('onehot', OneHotEncoder(drop='first', handle_unknown='ignore'))
    ])
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', numeric_transformer, numerical_features),
            ('cat', categorical_transformer, categorical_features)
        ]
    )
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('model', model)])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)
    messagebox.showinfo("Information", "Data Preprocessed and Model Trained Successfully")
    evaluate_model()

# Function to evaluate model
def evaluate_model():
    global y_pred, mse, r2
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    messagebox.showinfo("Information", "Model Evaluation Completed")

# Function to display actual and predicted prices
def display_prices():
    prices_window = tk.Toplevel(root)
    prices_window.title("Actual vs Predicted Prices")
    actual_label = tk.Label(prices_window, text="Actual Prices (first 10):", font=("Arial", 12, "bold"))
    actual_label.grid(row=0, column=0, padx=5, pady=5)
    actual_values = tk.Label(prices_window, text=str(y_test.values[:10]), font=("Arial", 12))
    actual_values.grid(row=0, column=1, padx=5, pady=5)
    pred_label = tk.Label(prices_window, text="Predicted Prices (first 10):", font=("Arial", 12, "bold"))
    pred_label.grid(row=1, column=0, padx=5, pady=5)
    pred_values = tk.Label(prices_window, text=str(y_pred[:10]), font=("Arial", 12))
    pred_values.grid(row=1, column=1, padx=5, pady=5)

# Function to display evaluation metrics
def display_metrics():
    metrics_window = tk.Toplevel(root)
    metrics_window.title("Model Evaluation Metrics")
    mse_label = tk.Label(metrics_window, text=f"Mean Squared Error: {mse}", font=("Arial", 12))
    mse_label.pack(padx=10, pady=5)
    r2_label = tk.Label(metrics_window, text=f"R-squared: {r2}", font=("Arial", 12))
    r2_label.pack(padx=10, pady=5)

# Function to display data distribution
def display_data_distribution():
    dist_window = tk.Toplevel(root)
    dist_window.title("Data Distribution")

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.histplot(df['price'], kde=True, ax=ax)
    ax.set_title('Price Distribution')

    canvas = FigureCanvasTkAgg(fig, master=dist_window)
    canvas.draw()
    canvas.get_tk_widget().pack()

# Function to save the model
def save_model():
    file_path = filedialog.asksaveasfilename(defaultextension=".pkl", filetypes=[("Pickle files", "*.pkl")])
    if file_path:
        joblib.dump(pipeline, file_path)
        messagebox.showinfo("Information", "Model Saved Successfully")

# Function to display other model accuracies
def display_other_model_accuracies():
    global X_train, X_test, y_train, y_test
    results_window = tk.Toplevel(root)
    results_window.title("Other Models Performance")

    # Linear Regression
    lr_model = LinearRegression()
    lr_model.fit(X_train, y_train)
    lr_pred = lr_model.predict(X_test)
    lr_r2 = r2_score(y_test, lr_pred)

    # Gradient Boosting Regressor
    gb_model = GradientBoostingRegressor(n_estimators=100, random_state=42)
    gb_model.fit(X_train, y_train)
    gb_pred = gb_model.predict(X_test)
    gb_r2 = r2_score(y_test, gb_pred)

    lr_label = tk.Label(results_window, text=f"Linear Regression R²: {lr_r2:.4f}", font=("Arial", 12))
    lr_label.pack(padx=10, pady=5)
    gb_label = tk.Label(results_window, text=f"Gradient Boosting Regressor R²: {gb_r2:.4f}", font=("Arial", 12))
    gb_label.pack(padx=10, pady=5)

# GUI setup
root = tk.Tk()
root.title("Car Price Prediction")
root.geometry("1200x800")

# Heading
heading = tk.Label(root, text="Car Price Prediction Model By Abu Bakar Siddiq", font=("Arial", 24, "bold"), fg="blue")
heading.grid(row=0, column=0, columnspan=4, pady=20)

# List of image paths
image_paths = [
    'ferrari.jpg',
    'ford.jpg',
    'lambo.jpg',
    'landcruiser.jpg',
    'mercedes.jpg',
    'mustang.jpg',
    'car4.jpg',
    'car5.jpg',
    'car6.jpg',
    'car7.jpg',
    'car8.jpg',
    'car9.jpg',
    'car10.jpg'
]

# Create frames for images
top_image_frame = tk.Frame(root)
top_image_frame.grid(row=1, column=0, columnspan=4, padx=10, pady=10)

bottom_image_frame = tk.Frame(root)
bottom_image_frame.grid(row=2, column=0, columnspan=4, padx=10, pady=10)

# Display images
def display_images():
    half = len(image_paths) // 2
    # Display top half images
    for i, image_path in enumerate(image_paths[:half]):
        try:
            img = Image.open(image_path)
            img = img.resize((150, 150))
            img = ImageTk.PhotoImage(img)
            image_label = tk.Label(top_image_frame, image=img)
            image_label.image = img
            image_label.grid(row=0, column=i, padx=5, pady=5)
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")

    # Display bottom half images
    for i, image_path in enumerate(image_paths[half:]):
        try:
            img = Image.open(image_path)
            img = img.resize((150, 150))
            img = ImageTk.PhotoImage(img)
            image_label = tk.Label(bottom_image_frame, image=img)
            image_label.image = img
            image_label.grid(row=0, column=i, padx=5, pady=5)
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")

display_images()  # Display images when GUI loads

load_button = tk.Button(root, text="Load Dataset", command=load_dataset, font=("Arial", 12), bg="lightgreen")
load_button.grid(row=3, column=0, padx=10, pady=10)

preprocess_button = tk.Button(root, text="Preprocess Data", command=preprocess_data, font=("Arial", 12), bg="lightblue")
preprocess_button.grid(row=3, column=1, padx=10, pady=10)

prices_button = tk.Button(root, text="Display Prices", command=display_prices, font=("Arial", 12), bg="lightyellow")
prices_button.grid(row=3, column=2, padx=10, pady=10)

metrics_button = tk.Button(root, text="Display Metrics", command=display_metrics, font=("Arial", 12), bg="lightcoral")
metrics_button.grid(row=3, column=3, padx=10, pady=10)

visualize_data_button = tk.Button(root, text="Visualize Data", command=display_data_distribution, font=("Arial", 12), bg="lightcyan")
visualize_data_button.grid(row=4, column=0, padx=10, pady=10)

save_model_button = tk.Button(root, text="Save Model", command=save_model, font=("Arial", 12), bg="lightblue")
save_model_button.grid(row=4, column=1, padx=10, pady=10)

other_models_button = tk.Button(root, text="Other Models Performance", command=display_other_model_accuracies, font=("Arial", 12), bg="lightpink")
other_models_button.grid(row=4, column=2, padx=10, pady=10)

root.mainloop()

