Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder #this is not necessary as xgboost does not need it dataset scaled
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import  train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import GradientBoostingRegressor

Loading the dataset

In [None]:
data = pd.read_csv("/used_cars_data.csv") #specify the path to your dataset

data = pd.DataFrame(data)

Making X and y variables 

In [None]:
X = data.drop("price", axis=1)
y = data["price"]

Splitting into the train and text set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 32)

OneHot Encoding the categorical variables, if needed

In [None]:
ct = ColumnTransformer(transformers = [("encoder", OneHotEncoder(sparse_output=False, handle_unknown='ignore'),[0,1,4,5,6,7,8,9,10])], remainder = "passthrough")
#Train set
x_train = np.array(ct.fit_transform(X_train))

#Test set
x_test = np.array(ct.transform(X_test))

Standardize features, if needed

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(x_train)
X_test_scaled = scaler.transform(x_test)

Training the model

In [None]:
model = GradientBoostingRegressor()

model.fit(X_train_scaled, y_train)

Predict on the validation set

In [None]:
y_pred = model.predict(X_test_scaled)

mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

# Calculate Root Mean Squared Error (RMSE)
rmse = np.sqrt(mse)

print("Root Mean Squared Error:", rmse)

Save the metrics results to a file

In [None]:
with open("metrics.txt", "w") as file:
    file.write(f"Mean Squared Error: {mse}\n")
    file.write(f"Root Mean Squared Error: {rmse}\n")

Saving the Prediction if needed

In [None]:
Prediction_File = pd.DataFrame({'real values': y_test, 'predicted price': y_pred})

# Round the 'predicted price' column to 2 decimal places
Prediction_File['predicted price'] = Prediction_File['predicted price'].round(2)

# Save the DataFrame to a CSV file
Prediction_File.to_csv('Prediction.csv', index=False)