**Import Libraries**

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

**Load the Dataset**

In [2]:
# Load the Car Price Prediction dataset
# Make sure to replace 'your_dataset_path' with the correct path on Kaggle
car_data = pd.read_csv('/kaggle/input/car-price-predictionused-cars/car data.csv')

**Data Exploration and Preprocessing**

In [3]:
# Explore the dataset
print(car_data.head())

# Drop unnecessary columns (if needed)
# car_data = car_data.drop(['column_name1', 'column_name2'], axis=1)

# Check for missing values
print(car_data.isnull().sum())

# Handle missing values (if needed)
# car_data = car_data.dropna()
# Or use imputation techniques like mean or median
# car_data['column_name'] = car_data['column_name'].fillna(car_data['column_name'].mean())

  Car_Name  Year  Selling_Price  Present_Price  Driven_kms Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Selling_type Transmission  Owner  
0       Dealer       Manual      0  
1       Dealer       Manual      0  
2       Dealer       Manual      0  
3       Dealer       Manual      0  
4       Dealer       Manual      0  
Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Driven_kms       0
Fuel_Type        0
Selling_type     0
Transmission     0
Owner            0
dtype: int64


**Feature Selection and Engineering**

In [4]:
# Identify features (X) and target variable (y)
X = car_data[['Year', 'Present_Price', 'Driven_kms', 'Fuel_Type', 'Selling_type', 'Transmission', 'Owner']]
y = car_data['Selling_Price']

# Handle categorical features using one-hot encoding (if needed)
X = pd.get_dummies(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensure feature names match between training and testing datasets
missing_features = set(X_train.columns) - set(X_test.columns)
for feature in missing_features:
    X_test[feature] = 0

**Build and Train the Model**

In [5]:
# Initialize the Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_regressor.fit(X_train, y_train)

**Make Predictions and Evaluate the Model**

In [6]:
# Make predictions on the test set
y_pred = rf_regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R-squared: {r2:.2f}")

Mean Squared Error: 0.81
R-squared: 0.96


**Print the order of feature names for both the training and testing datasets**

In [7]:
# Print the order of feature names for training dataset
print("Training Feature Names:")
print(X_train.columns)

# Print the order of feature names for testing dataset
print("\nTesting Feature Names:")
print(X_test.columns)

Training Feature Names:
Index(['Year', 'Present_Price', 'Driven_kms', 'Owner', 'Fuel_Type_CNG',
       'Fuel_Type_Diesel', 'Fuel_Type_Petrol', 'Selling_type_Dealer',
       'Selling_type_Individual', 'Transmission_Automatic',
       'Transmission_Manual'],
      dtype='object')

Testing Feature Names:
Index(['Year', 'Present_Price', 'Driven_kms', 'Owner', 'Fuel_Type_CNG',
       'Fuel_Type_Diesel', 'Fuel_Type_Petrol', 'Selling_type_Dealer',
       'Selling_type_Individual', 'Transmission_Automatic',
       'Transmission_Manual'],
      dtype='object')


**Test the Model with Custom Data**

In [8]:
# Test the model with custom data
custom_data = pd.DataFrame({
    'Year': [2018],
    'Present_Price': [10.0],
    'Driven_kms': [50000],
    'Owner': [0],
    'Fuel_Type_CNG': [0],
    'Fuel_Type_Diesel': [0],
    'Fuel_Type_Petrol': [1],
    'Selling_type_Dealer': [0],
    'Selling_type_Individual': [1],
    'Transmission_Automatic': [0],
    'Transmission_Manual': [1]
})

custom_prediction = rf_regressor.predict(custom_data)

print(f"Predicted Selling Price for Custom Data: {custom_prediction[0]:.2f}")

Predicted Selling Price for Custom Data: 7.79
