In [1]:
import pandas as pd

import torch
import torch.nn as nn
from torch.optim import Adam

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import QuantileTransformer
from sklearn.metrics import mean_squared_error, r2_score

from skorch import NeuralNetRegressor

In [2]:
random_state = 177013
torch.manual_seed (random_state)
batch_size = 1000
num_workers = 4

In [3]:
df = pd.read_csv('auto_filtered.csv', index_col=0)
df.head()

Unnamed: 0,Price,VehicleType,RegistrationYear,Power,Model,Kilometer,RegistrationMonth,FuelType,Brand,PostalCode,Gearbox_manual,NotRepaired_yes
3,1500,small,2001,75,golf,150000,6,petrol,volkswagen,91074,1,0
4,3600,small,2008,69,fabia,90000,7,petrol,skoda,60437,1,0
5,650,sedan,1995,102,3er,150000,10,petrol,bmw,33775,1,1
6,2200,convertible,2004,109,2_reihe,150000,8,petrol,peugeot,67112,1,0
10,2000,sedan,2004,105,3_reihe,150000,12,petrol,mazda,96224,1,0


In [4]:
df = pd.get_dummies(df, columns = ['VehicleType', 'Model', 'FuelType', 'Brand'], drop_first=True)

In [5]:
df_features = df.drop(['Price','Gearbox_manual','RegistrationMonth'], axis=1)
df_target = df['Price']

In [6]:
features_train, features_test, target_train, target_test = train_test_split(df_features, df_target,
                                                                            test_size=0.2, random_state=random_state)

In [7]:
quantile_transformer = QuantileTransformer(random_state=random_state)
quantile_transformer.fit(features_train)
features_train = quantile_transformer.transform(features_train)
features_test = quantile_transformer.transform(features_test)

In [8]:
class Linear(nn.Module):
    def __init__(self, n_features):
        super().__init__()
        self.fc1 = torch.nn.Linear(n_features, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu1 = nn.LeakyReLU()
        self.relu2 = nn.LeakyReLU()
        self.norm1 = nn.BatchNorm1d(64)
        self.norm2 = nn.BatchNorm1d(32)

    def forward(self, batch):
        batch = self.fc1(batch)
        batch = self.relu1(batch)
        batch = self.norm1(batch)
        batch = self.fc2(batch)
        batch = self.relu2(batch)
        batch = self.norm2(batch)
        return self.fc3(batch)

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("The model will be running on", device, "device")

The model will be running on cpu device


In [10]:
model = Linear(df_features.shape[1])

In [11]:
net = NeuralNetRegressor(
    model,
    max_epochs=25,
    batch_size=batch_size,
    lr=0.005,
    criterion = nn.MSELoss,
    optimizer = Adam,
    device = device,
)

In [12]:
net.fit(features_train.astype('float32'), target_train.values.astype('float32').reshape(-1, 1))

  epoch     train_loss     valid_loss     dur
-------  -------------  -------------  ------
      1  [36m17351227.4510[0m  [32m17189124.7717[0m  1.4562
      2  [36m16451081.7891[0m  [32m15871585.9428[0m  1.3413
      3  [36m14755546.5602[0m  [32m13862332.4786[0m  1.3430
      4  [36m12509712.0399[0m  [32m11362660.1687[0m  1.3157
      5  [36m10038213.4093[0m  [32m8779011.4692[0m  1.3073
      6  [36m7656945.2794[0m  [32m6532941.8336[0m  1.3716
      7  [36m5608395.3989[0m  [32m4608926.0222[0m  1.4055
      8  [36m4020800.7274[0m  [32m3259061.3750[0m  1.3737
      9  [36m2897382.7267[0m  [32m2360714.6933[0m  1.3610
     10  [36m2158731.5422[0m  [32m1858243.2549[0m  1.3605
     11  [36m1693438.9412[0m  [32m1558393.7344[0m  1.3513
     12  [36m1400690.8189[0m  [32m1337812.7334[0m  1.3283
     13  [36m1227819.0453[0m  [32m1222948.4728[0m  1.6220
     14  [36m1137469.0805[0m  [32m1166555.4785[0m  1.6119
     15  [36m1093434.7716[0m 

<class 'skorch.regressor.NeuralNetRegressor'>[initialized](
  module_=Linear(
    (fc1): Linear(in_features=301, out_features=64, bias=True)
    (fc2): Linear(in_features=64, out_features=32, bias=True)
    (fc3): Linear(in_features=32, out_features=1, bias=True)
    (relu1): LeakyReLU(negative_slope=0.01)
    (relu2): LeakyReLU(negative_slope=0.01)
    (norm1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (norm2): BatchNorm1d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  ),
)

In [13]:
predictions = net.predict(features_test.astype('float32'))
mse = mean_squared_error(target_test, predictions)
r2 = r2_score(target_test, predictions)
print(f'Test RMSE: {mse ** 0.5:.2f}, R2: {r2:.2f}')

Test RMSE: 1058.90, R2: 0.82
