In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
file_path = '/car_price_prediction.csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Preprocess Mileage column: Remove 'km' and convert to numeric
data['Mileage'] = data['Mileage'].str.replace(' km', '').str.replace(',', '').astype(float)

# Separate the features (X) and the target variable (y)
X = data.drop(['Price', 'ID'], axis=1)  # Drop 'ID' as it's not a feature and 'Price' as it's the target
y = data['Price']

# Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# Preprocessing for numerical data
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])

# Define the model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Create a pipeline that combines preprocessing with the model
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('model', model)])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the model
pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R² Score: {r2}")

# Predicting Car Price for New Data
# Example new car data (replace with actual new car data values)
new_car_data = pd.DataFrame({
    'Manufacturer': ['TOYOTA'],  # Example value, replace with actual car make
    'Model': ['Camry'],  # Example value, replace with actual car model
    'Prod. year': [2018],  # Example value, replace with actual year
    'Category': ['Sedan'],  # Example value, replace with actual category
    'Leather interior': ['Yes'],  # Example value, replace with actual leather interior status
    'Fuel type': ['Petrol'],  # Example value, replace with actual fuel type
    'Engine volume': [2.5],  # Example value, replace with actual engine size
    'Mileage': [30000],  # Example value, replace with actual mileage (no 'km')
    'Cylinders': [4],  # Example value, replace with actual number of cylinders
    'Gear box type': ['Automatic'],  # Example value, replace with actual transmission type
    'Drive wheels': ['FWD'],  # Example value, replace with actual drive wheels
    'Doors': [4],  # Example value, replace with actual number of doors
    'Wheel': ['Left wheel'],  # Example value, replace with actual wheel position
    'Color': ['Red'],  # Example value, replace with actual color
    'Airbags': [6]  # Example value, replace with actual number of airbags
})

# Predict the car price using the trained model
predicted_price = pipeline.predict(new_car_data)

print(f"Predicted Car Price: {predicted_price[0]}")



Mean Absolute Error (MAE): 4401.281146187475
Mean Squared Error (MSE): 114463916.48631892
Root Mean Squared Error (RMSE): 10698.781074791601
R² Score: 0.6326535299394285
Predicted Car Price: 39190.46
