This is a simple lasso regression model to predict the prices of used cars using the used_cars dataset.

# Loading the libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import  train_test_split
from sklearn.compose import ColumnTransformer

# Importing the dataset

In [None]:
data = pd.read_csv("/used_cars_data.csv") #specify the path to your dataset

data = pd.DataFrame(data)

# Specify the features and the target variable

In [None]:
# Split features and target variable
X = data.drop(columns=["price"])
y = data["price"]

# Splitting into training and test set

In [None]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Encoding the categorical features

In [None]:
ct = ColumnTransformer(transformers = [("encoder", OneHotEncoder(sparse_output=False, handle_unknown='ignore'),[0,1,4,5,6,7,8,9,10])], remainder = "passthrough")
#Train set
x_train = np.array(ct.fit_transform(X_train))

#Test set
x_test = np.array(ct.transform(X_test))

# Standardizing the dataset

In [None]:
# Standardize features, if needed
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Training the Lasso Regression Model

In [None]:
# Initialize the Lasso Regression model
model = Lasso(alpha=20) #you can vary the value of alpha as you see fit to procude the best model

# Fit the model to the training data and evaluate as shown above
model.fit(x_train, y_train)

# Testing and calculating the performance of the model

In [None]:
#prediction
y_pred =  model.predict(x_test)

#mse
mse = mean_squared_error(y_pred, y_test)
rmse = np.sqrt(mse)

print(f"The mse score is: {mse}")
print(f"This is the rmse score: {rmse}") #other evaluation metrics can also be used liked r-squared

Save the metrics

In [None]:
with open("metrics.txt", "w") as file:
    file.write(f"Mean Squared Error: {mse}\n")
    file.write(f"Root Mean Squared Error: {rmse}\n")

Saving the predicitons

In [None]:
Prediction_File = pd.DataFrame({'real values': y_test, 'predicted price': y_pred})

# Round the 'predicted price' column to 2 decimal places
Prediction_File['predicted price'] = Prediction_File['predicted price'].round(2)

# Save the DataFrame to a CSV file
Prediction_File.to_csv('Prediction.csv', index=False)