In [9]:
import numpy as np
import pandas as pd
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

data= pd.read_csv('CarPrice_Assignment.csv')
X = data.drop(columns=['price'])
y = data['price']
categorical_features = ['CarName', 'fueltype', 'aspiration', 'doornumber','carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber','fuelsystem']
numerical_features = X.drop(columns=categorical_features).columns.tolist()
numeric_transformer = Pipeline([('scaler', StandardScaler())])
categorical_transformer = Pipeline([('onehot', OneHotEncoder(handle_unknown='ignore'))])
preprocessor = ColumnTransformer([('num', numeric_transformer, numerical_features),('cat', categorical_transformer, categorical_features)])

### Bagging

In [10]:
base_estimator = DecisionTreeRegressor(random_state=42)
bagging_model = BaggingRegressor(
base_estimator=base_estimator, n_estimators=10, max_samples=0.8,random_state=42)
# Create a pipeline with the preprocessor and the model
model_pipeline = Pipeline([('preprocessor', preprocessor),('model', bagging_model)])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
model_pipeline.fit(X_train, y_train)
y_pred = model_pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)

Mean Absolute Error (MAE): 1701.7105609756095
Mean Squared Error (MSE): 6114789.889631438
Root Mean Squared Error (RMSE): 2472.81012001153


### Random Forest

In [11]:
random_forest_model = RandomForestRegressor(n_estimators=100, random_state=42)
# Create a pipeline with the preprocessor and the model
model_pipeline = Pipeline([('preprocessor', preprocessor),('model', random_forest_model)])
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=42)
model_pipeline.fit(X_train, y_train)
y_pred = model_pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)


Mean Absolute Error (MAE): 1376.4869024390243
Mean Squared Error (MSE): 3666126.8105526576
Root Mean Squared Error (RMSE): 1914.7132449932699
