In [None]:
from datetime import datetime

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow import keras

In [None]:
# https://www.kaggle.com/datasets/austinreese/craigslist-carstrucks-data
df = pd.read_csv("/Users/akapralov/tmp/vehicles.csv")
df

In [None]:
columns = ['price', 'year', 'manufacturer', 'model']
df = df.dropna(subset=columns).filter(items=columns).query(
    'price >= 5_000 & price <= 100_000 & year >= 2000')
df

In [None]:
car_features = df.copy()
price_labels = car_features.pop('price')
car_features

In [None]:
car_features['model'] = car_features.apply(lambda r: r['model'].split(' ')[0], axis=1)

model_encoder = LabelEncoder()
model_encoder.fit(np.array(car_features['model']))
car_features['model'] = model_encoder.transform(np.array(car_features['model']))

manufacturer_encoder = LabelEncoder()
manufacturer_encoder.fit(np.array(car_features['manufacturer']))
car_features['manufacturer'] = manufacturer_encoder.transform(
    np.array(car_features['manufacturer']))

current_year = datetime.now().year
car_features['year'] = car_features.apply(lambda r: current_year - r['year'], axis=1)

car_features

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(car_features))

normalizer.mean.numpy()

In [None]:
model = keras.models.Sequential([
  normalizer,
  keras.layers.Dense(2048, activation="relu"),
  keras.layers.Dense(1)
])
model.compile(
    loss="mean_squared_error",
    optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.1),
    metrics=['mae', 'mse']
)
model.summary()


In [None]:
history = model.fit(car_features, price_labels, epochs=100, validation_split=0.2)

In [None]:
cars = pd.DataFrame(
    data=[
      (5, 'toyota', '4runner'),
      (10, 'toyota', 'tacoma'),
      (7, 'ford', 'focus'),
      (3, 'volkswagen', 'jetta'),
    ],
    columns=['year', 'manufacturer', 'model']
)
cars

In [None]:
X_test = cars.copy()
X_test['manufacturer'] = manufacturer_encoder.transform(X_test['manufacturer'])
X_test['model'] = model_encoder.transform(X_test['model'])
X_test

In [None]:
model.predict(X_test)