In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder


df = pd.read_csv("../car_prices.csv")

df['saledate'] = pd.to_datetime(df['saledate'], errors='coerce', utc=True)
df['month'] = df['saledate'].dt.month
sales_data = df.groupby(['model', 'month']).agg({'sellingprice': 'count'}).reset_index()
sales_data.rename(columns={'sellingprice': 'sales_count'}, inplace=True)
df = pd.merge(sales_data, df, on=['model', 'month'], how='left')

df = df.drop(["vin", "mmr", "saledate", "trim"], axis = 1)

num = df.select_dtypes(include=[np.float64, np.int64]).columns.tolist()
categ = [col for col in df.columns if col not in num]

df = df.dropna()

print(df.shape, df.columns)

X = df.drop(['sales_count', "model", "month"], axis=1)
y = df['sales_count']
num.remove("sales_count")
categ.remove("model")
num.remove("month")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

X_train = pd.get_dummies(X_train, columns=categ, drop_first=True)
X_test = pd.get_dummies(X_test, columns=categ, drop_first=True)
X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

scaler = StandardScaler()
X_train[num] = pd.DataFrame(scaler.fit_transform(X_train[num]), index=X_train.index, columns=num)
X_test[num] = pd.DataFrame(scaler.transform(X_test[num]), index=X_test.index, columns=num)

  df['saledate'] = pd.to_datetime(df['saledate'], errors='coerce', utc=True)


(472325, 14) Index(['model', 'month', 'sales_count', 'year', 'make', 'body', 'transmission',
       'state', 'condition', 'odometer', 'color', 'interior', 'seller',
       'sellingprice'],
      dtype='object')


In [3]:
from keras.layers import InputLayer, Dense
from keras.models import Sequential
from keras.optimizers import Adam


model_nn = Sequential([
    InputLayer((X_train.shape[1],)),
    Dense(64, "relu"),
    Dense(32, "relu"),
    Dense(16, "relu"),
    Dense(1)
])

model_nn.compile(optimizer=Adam(learning_rate = 0.001), loss='mean_squared_error')
model_nn.fit(X_train, y_train, epochs=5, batch_size=32)

y_pred = model_nn.predict(X_test)
y_pred_train = model_nn.predict(X_train)

r2_val = round(r2_score(y_test, y_pred), 4)
rmse_val = round(np.sqrt(mean_squared_error(y_test, y_pred)))

r2_train = round(r2_score(y_train, y_pred_train), 4)
rmse_train = round(np.sqrt(mean_squared_error(y_train, y_pred_train)))

print(r2_val, rmse_val)
print(r2_train, rmse_train)

Epoch 1/5
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 4ms/step - loss: 844799.2500
Epoch 2/5
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 4ms/step - loss: 590133.0625
Epoch 3/5
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 4ms/step - loss: 543784.6875
Epoch 4/5
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 4ms/step - loss: 516670.4375
Epoch 5/5
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 4ms/step - loss: 497917.6250
[1m739/739[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m14023/14023[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2ms/step
0.6299 717
0.6452 697


In [4]:
model_nn.save('model.keras')