# 1. Prepare Data

In [1]:
import pandas

# Load the data
file_name = "used_cars_autoscout24.csv"
data = pandas.read_csv(file_name)
data["power_ps"] = pandas.to_numeric(data["power_ps"], errors='coerce')
data["price_in_euro"] = pandas.to_numeric(data["price_in_euro"], errors='coerce')
data["year"] = pandas.to_numeric(data["year"], errors='coerce')

# Clean the data
clean_data = data[ ["brand", "model", "color", "transmission_type", 
                    "year", "mileage_in_km", "power_ps", "price_in_euro"] ]
clean_data = clean_data.dropna(axis="rows")
clean_data = pandas.get_dummies(clean_data)

# Train-test split
from sklearn.model_selection import train_test_split
x = clean_data.drop("price_in_euro", axis="columns")
y = clean_data[ ["price_in_euro"] ]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Scaling
from sklearn.preprocessing import MinMaxScaler
s_input = MinMaxScaler(feature_range=(0,1))
s_output = MinMaxScaler(feature_range=(0,1))
s_input.fit(x_train)
s_output.fit(y_train)
x_train_scaled = s_input.transform(x_train)
y_train_scaled = s_output.transform(y_train)
x_test_scaled = s_input.transform(x_test)
y_test_scaled = s_output.transform(y_test)

# 2. Keras NN

In [2]:
from tensorflow import keras

# Create the model
input_count = x_train.shape[1]

model = keras.models.Sequential()
model.add(keras.layers.InputLayer(shape=(input_count,)))
model.add(keras.layers.Dense(64, activation="relu"))
model.add(keras.layers.Dense(8, activation="relu"))
model.add(keras.layers.Dense(1, activation=None))

model.compile(optimizer="adam", loss="mean_absolute_error")

# Train the model
print("Train")
history = model.fit(x_train_scaled, y_train_scaled, epochs=5)

# Evaluate the model
print("\nEvalutate")
predicts = model.predict(x_test_scaled)
predicts_real = s_output.inverse_transform(predicts).flatten()
ground_truth_real = y_test.values.flatten()

# Mean absolute error
from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(ground_truth_real, predicts_real)
print(f"Mean absolute error: {mae:.2f} €")

# Mean absolute percentage error
from sklearn.metrics import mean_absolute_percentage_error
mape = mean_absolute_percentage_error(ground_truth_real, predicts_real)
print(f"Mean absolute percentage error: {mape*100:.2f} %")

Train
Epoch 1/5
[1m6264/6264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 692us/step - loss: 0.0019
Epoch 2/5
[1m6264/6264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 684us/step - loss: 7.5334e-04
Epoch 3/5
[1m6264/6264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 685us/step - loss: 7.0855e-04
Epoch 4/5
[1m6264/6264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 680us/step - loss: 6.8723e-04
Epoch 5/5
[1m6264/6264[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 681us/step - loss: 6.7349e-04

Evalutate
[1m1566/1566[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545us/step
Mean absolute error: 4065.79 €
Mean absolute percentage error: 18.08 %
