In [1]:
import sys

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, \
    mean_absolute_percentage_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.experimental import enable_iterative_imputer
from torch.utils.data import TensorDataset, DataLoader

from src.models.used_car_quote_nn import UsedCarQuoteNN

sys.path.append('../src')
from src.models.pipeline import CarsPipeline

In [2]:
data = pd.read_csv('../datasets/Car details v3.csv')

data["selling_price_log"] = np.log1p(data["selling_price"])

X = data.drop(columns=['selling_price', 'selling_price_log'])
y = data['selling_price']
y_log = data['selling_price_log']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train_log, X_test_log, y_train_log, y_test_log = train_test_split(X, y_log, test_size=0.3, random_state=42)
final_pipeline = CarsPipeline()

X_train_processed = final_pipeline.fit_transform_df(X_train)
X_test_processed = final_pipeline.transform_df(X_test)

X_train_tensor = torch.tensor(X_train_processed.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_processed.to_numpy(), dtype=torch.float32)
y_train_tensor_log = torch.tensor(y_train_log.to_numpy(), dtype=torch.float32).view(-1, 1)
y_test_tensor_log = torch.tensor(y_test_log.to_numpy(), dtype=torch.float32).view(-1, 1)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor_log)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor_log)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)



In [3]:
model = UsedCarQuoteNN(X_train_processed.shape[1])

criterion = torch.nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

In [4]:
def train_model_batch(model, train_loader, X_val, y_val, epochs=100):
    for epoch in range(epochs):
        model.train()
        for X_batch, y_batch in train_loader:
            y_pred_train = model(X_batch)
            loss = criterion(y_pred_train, y_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        model.eval()
        with torch.no_grad():
            y_pred_val = model(X_val)
            criterion(y_pred_val, y_val).item()


train_model_batch(model, train_loader, X_test_tensor, y_test_tensor_log, epochs=1000)

In [15]:

y_pred_list = []
y_pred_list_log = []
y_test_list_log = []

model.eval()

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        y_test_pred_log = model(X_batch)

        y_test_pred = torch.expm1(y_test_pred_log)

        y_pred_list.append(y_test_pred)  
        y_pred_list_log.append(y_test_pred_log)
        y_test_list_log.append(y_batch)

y_pred_tensor = torch.cat(y_pred_list).squeeze()
y_pred_log_tensor = torch.cat(y_pred_list_log).squeeze()
y_test_log_tensor = torch.cat(y_test_list_log).squeeze()

y_pred_np = y_pred_tensor.numpy()
y_pred_log_np = y_pred_log_tensor.numpy()
y_test_log_np = y_test_log_tensor.numpy()

y_test_np = np.expm1(y_test_log_np)

metrics_nn = {
    "name": "NN",
    "MAE_training": mean_absolute_error(y_test_log_np, y_pred_log_np),
    "MAE": mean_absolute_error(y_test_np, y_pred_np),
    "RMSE": root_mean_squared_error(y_test_np, y_pred_np),
    "MAPE": mean_absolute_percentage_error(y_test_np, y_pred_np),
    "R2": r2_score(y_test_np, y_pred_np)
}

ValueError: Input contains infinity or a value too large for dtype('float32').

In [None]:
print(metrics_nn)