In [23]:
# neural_net_car_price.py
# Requires: pandas, numpy, scikit-learn, tensorflow (>=2.x), matplotlib (optional for plots)

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow import keras
from pathlib import Path
import joblib

In [3]:
# ---------------------------
# 1) Load data
# ---------------------------
CSV_PATH = "car_sales_data.csv"   # already present for you
df = pd.read_csv(CSV_PATH)

# Expect these columns based on your file:
# ['Manufacturer','Model','Engine size','Fuel type','Year of manufacture','Mileage','Price']
target_col = "Price"

# Basic sanity checks
df = df.dropna(subset=[target_col])  # drop rows with missing target
df = df.reset_index(drop=True)

In [4]:
# ---------------------------
# 2) Split features/target
# ---------------------------
X = df.drop(columns=[target_col])
y = df[target_col].astype(float)

# Identify column types
categorical_cols = ["Manufacturer", "Model", "Fuel type"]
numeric_cols = ["Engine size", "Year of manufacture", "Mileage"]

In [5]:
# ---------------------------
# 3) Preprocess
# ---------------------------
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_cols),
        ("num", StandardScaler(), numeric_cols),
    ]
)

In [6]:
# ---------------------------
# 4) Build Keras model factory
# ---------------------------
def build_model(input_dim: int) -> keras.Model:
    model = keras.Sequential([
        keras.layers.Input(shape=(input_dim,)),
        keras.layers.Dense(128, activation="relu"),
        keras.layers.Dropout(0.1),
        keras.layers.Dense(64, activation="relu"),
        keras.layers.Dense(1)  # regression output
    ])
    # Huber loss is robust to outliers; you can switch to "mse" if you prefer
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=1e-3),
                  loss=keras.losses.Huber(),
                  metrics=[keras.metrics.MeanAbsoluteError(name="MAE")])
    return model

In [7]:
# ---------------------------
# 5) Train/Validation/Test split
# ---------------------------
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y, test_size=0.15, random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval, test_size=0.1765, random_state=42
)
# Now: ~70% train, ~15% val, ~15% test

In [8]:
# ---------------------------
# 6) Fit the preprocessor and transform
# ---------------------------
preprocessor.fit(X_train)
X_train_tf = preprocessor.transform(X_train)
X_val_tf   = preprocessor.transform(X_val)
X_test_tf  = preprocessor.transform(X_test)

In [9]:
# ---------------------------
# 7) Build & train the model
# ---------------------------
tf.random.set_seed(42)
np.random.seed(42)

model = build_model(input_dim=X_train_tf.shape[1])

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=10, restore_best_weights=True
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=5, min_lr=1e-5
    ),
]

history = model.fit(
    X_train_tf, y_train.values,
    validation_data=(X_val_tf, y_val.values),
    epochs=200,
    batch_size=256,
    verbose=1,
    callbacks=callbacks
)

Epoch 1/200
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - MAE: 13740.4805 - loss: 13739.9805 - val_MAE: 13657.6514 - val_loss: 13657.1514 - learning_rate: 0.0010
Epoch 2/200
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - MAE: 12755.8281 - loss: 12755.3281 - val_MAE: 11603.3164 - val_loss: 11602.8164 - learning_rate: 0.0010
Epoch 3/200
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - MAE: 9666.7217 - loss: 9666.2217 - val_MAE: 7882.4170 - val_loss: 7881.9175 - learning_rate: 0.0010
Epoch 4/200
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - MAE: 6505.9673 - loss: 6505.4673 - val_MAE: 5460.1328 - val_loss: 5459.6333 - learning_rate: 0.0010
Epoch 5/200
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - MAE: 4795.8447 - loss: 4795.3452 - val_MAE: 4277.6030 - val_loss: 4277.1030 - learning_rate: 0.0010
Epoch 6/200
[1m137/137[0m [32m━━━━━━━━━━━━━━

In [11]:
# ---------------------------
# 8) Evaluate (“accuracy”) on the held-out test set
# ---------------------------
y_pred = model.predict(X_test_tf, verbose=0).ravel()

r2  = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)

# % within 10% of true price (an intuitive accuracy-like metric)
within_10pct = np.mean(np.abs(y_pred - y_test.values) <= 0.10 * np.abs(y_test.values))

print("\n=== Test Set Performance ===")
print(f"R²:   {r2:0.4f}")
print(f"RMSE: {rmse:,.2f}")
print(f"MAE:  {mae:,.2f}")
print(f"Accuracy (within ±10%): {within_10pct*100:0.2f}%")


=== Test Set Performance ===
R²:   0.9995
RMSE: 129,738.59
MAE:  125.90
Accuracy (within ±10%): 98.12%


In [None]:
# ---------------------------
# 9) Save model + preprocessor
# ---------------------------

MODEL_DIR = Path(r"C:\Users\Georg\OneDrive\Dokument\TradingBot")
MODEL_DIR.mkdir(parents=True, exist_ok=True)

# 1) Save Keras model file
model_path = MODEL_DIR / "car_price_model.keras"   # note the .keras extension
model.save(model_path)

# 2) Save the preprocessor
joblib.dump(preprocessor, MODEL_DIR / "preprocessor.joblib")

print(f"Saved model to {model_path}")
print(f"Saved preprocessor to {MODEL_DIR/'preprocessor.joblib'}")

Saved model to C:\Users\Georg\OneDrive\Dokument\TradingBot\car_price_model.keras
Saved preprocessor to C:\Users\Georg\OneDrive\Dokument\TradingBot\preprocessor.joblib


In [22]:
# === 3) Load the preprocessor ===
preprocessor = joblib.load(MODEL_DIR / "preprocessor.joblib")

# === 4) Create an example car to predict ===
example = {
    "Manufacturer": "Ford",
    "Model": "Fiesta",
    "Fuel type": "Petrol",
    "Engine size": 2.0,               # liters
    "Year of manufacture": 2021,
    "Mileage": 100000                  # km or miles, same unit as your dataset
}

X_new = pd.DataFrame([example])

# === 5) Transform + predict ===
X_new_tf = preprocessor.transform(X_new)
pred_price = model.predict(X_new_tf, verbose=0).ravel()[0]

print("Input:", X_new.to_dict(orient="records")[0])
print(f"Predicted price: {pred_price:,.2f}")


Input: {'Manufacturer': 'Ford', 'Model': 'Fiesta', 'Fuel type': 'Petrol', 'Engine size': 2.0, 'Year of manufacture': 2021, 'Mileage': 100000}
Predicted price: 19,003.19
