In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("minahilfatima12328/car-sales-info")

print("Path to dataset files:", path)

Using Colab cache for faster access to the 'car-sales-info' dataset.
Path to dataset files: /kaggle/input/car-sales-info


In [None]:
import os

data_path = "/kaggle/input/car-sales-info"
print(os.listdir(data_path))

['car_sales_data.csv']


In [None]:
import pandas as pd

df = pd.read_csv(f"{data_path}/car_sales_data.csv")
print(df.head())
print(df.info())

  Manufacturer       Model  Engine size Fuel type  Year of manufacture  \
0         Ford      Fiesta          1.0    Petrol                 2002   
1      Porsche  718 Cayman          4.0    Petrol                 2016   
2         Ford      Mondeo          1.6    Diesel                 2014   
3       Toyota        RAV4          1.8    Hybrid                 1988   
4           VW        Polo          1.0    Petrol                 2006   

   Mileage  Price  
0   127300   3074  
1    57850  49704  
2    39190  24072  
3   210814   1705  
4   127869   4101  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Manufacturer         50000 non-null  object 
 1   Model                50000 non-null  object 
 2   Engine size          50000 non-null  float64
 3   Fuel type            50000 non-null  object 
 4   Year of manufacture  5

In [None]:
from datetime import datetime

current_year = datetime.now().year
df["Car_Age"] = current_year - df["Year of manufacture"]
df = df.drop("Year of manufacture", axis=1)  # optional

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop("Price", axis=1)
y = df["Price"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

categorical = ["Manufacturer", "Model", "Fuel type"]
numeric = ["Engine size", "Mileage", "Car_Age"]

preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
    ("num", StandardScaler(), numeric)
])

model = Pipeline([
    ("preprocess", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
])

In [None]:
# Train the pipeline on training data
model.fit(X_train, y_train)

In [None]:
# Predictions
preds = model.predict(X_test)

# Metrics
mse = mean_squared_error(y_test, preds)
rmse = mse ** 0.5
r2 = r2_score(y_test, preds)

print("RMSE:", rmse)
print("R²:", r2)

RMSE: 624.9068927731595
R²: 0.9985610011458825


In [None]:
import joblib

joblib.dump(model, "car_price_model.pkl")
print("✅ Model saved as car_price_model.pkl")

✅ Model saved as car_price_model.pkl


In [None]:
loaded_model = joblib.load("car_price_model.pkl")
sample_pred = loaded_model.predict(X_test[:5])
print("Sample predictions:", sample_pred)

Sample predictions: [67730.81  35501.02  17073.365  3104.595  4845.165]


In [None]:
import gradio as gr
import pandas as pd
from joblib import load

# Load the trained model
model = load("car_price_model.pkl")

# Trained categorical values
trained_values = {
    "Manufacturer": ['BMW', 'Ford', 'Porsche', 'Toyota', 'VW'],
    "Model": ['718 Cayman', '911', 'Cayenne', 'Fiesta', 'Focus',
              'Golf', 'M5', 'Mondeo', 'Passat', 'Polo',
              'Prius', 'RAV4', 'X3', 'Yaris', 'Z4'],
    "Fuel type": ['Diesel', 'Hybrid', 'Petrol']
}

# Car choices for dropdown
car_choices = [f"{manu} {model_name}" for manu in trained_values['Manufacturer'] for model_name in trained_values['Model']]

# Prediction function
def predict_price(car_choice, fuel_type, engine_size, mileage, car_age):
    manufacturer, model_name = car_choice.split(" ", 1)
    input_df = pd.DataFrame([{
        'Manufacturer': manufacturer,
        'Model': model_name,
        'Engine size': engine_size,
        'Fuel type': fuel_type,
        'Mileage': mileage,
        'Car_Age': car_age
    }])
    predicted_price = model.predict(input_df)[0]
    return f"${predicted_price:,.2f}"

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## 🚗 Car Price Prediction", elem_classes="header-text")

    with gr.Row():
        with gr.Column(scale=2):
            car_input = gr.Dropdown(car_choices, label="Select Car (Manufacturer + Model)", elem_classes="input-box")
            fuel_type_input = gr.Dropdown(trained_values['Fuel type'], label="Fuel Type", elem_classes="input-box")
            engine_size_input = gr.Number(label="Engine Size (Liters)", value=2.0, step=0.1, elem_classes="input-box")
            mileage_input = gr.Number(label="Mileage (km)", value=10000, elem_classes="input-box")
            car_age_input = gr.Number(label="Car Age (Years)", value=1, elem_classes="input-box")
            predict_button = gr.Button("Predict Price", variant="primary", elem_classes="predict-button")
        with gr.Column(scale=1):
            output = gr.Textbox(label="Predicted Price", interactive=False, elem_classes="output-box")

    predict_button.click(
        fn=predict_price,
        inputs=[car_input, fuel_type_input, engine_size_input, mileage_input, car_age_input],
        outputs=output
    )

    # CSS for colors and styling
    demo.css = """
    body { background-color: #f4f4f4; color: #111; font-family: Arial, sans-serif; }
    .header-text { color: #1a1aff; font-weight: bold; text-align: center; }
    .input-box label { color: #333333; font-weight: bold; }
    .input-box .gr-dropdown, .input-box .gr-number input { background-color: #ffffff; color: #111; border: 1px solid #ccc; border-radius: 5px; }
    .predict-button { background-color: #ff4d4d; color: white; font-weight: bold; border-radius: 5px; }
    .output-box label { color: #1a1aff; font-weight: bold; }
    .output-box textarea { background-color: #d9f2ff; color: #111; border: 1px solid #1a1aff; border-radius: 5px; }
    """

demo.launch()

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6e03d17dc33eb16dfe.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


