In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load the dataset
df = pd.read_csv('car_data.csv')

# Display the first few rows of the dataset
print(df.head())


  Car_Name  Year  Selling_Price  Present_Price  Driven_kms Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Selling_type Transmission  Owner  
0       Dealer       Manual      0  
1       Dealer       Manual      0  
2       Dealer       Manual      0  
3       Dealer       Manual      0  
4       Dealer       Manual      0  


In [None]:
# Drop any rows with missing values
df.dropna(inplace=True)
df=df.drop('Car_Name',axis=1)
# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['Fuel_Type', 'Selling_type', 'Transmission'])

# Separate features and target variable
X = df.drop(['Selling_Price'], axis=1)
y = df['Selling_Price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the RandomForestRegressor
model = RandomForestRegressor()

# Train the model
model.fit(X_train, y_train)


In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Calculate the Root Mean Squared Error
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")


Mean Squared Error: 0.8467929160655744
Root Mean Squared Error: 0.9202135165631803


In [None]:
# Assuming 'model' is your trained RandomForestRegressor model
feature_importances = model.feature_importances_
print("Feature importances:", feature_importances)

# Assuming 'X_train' is your DataFrame used for training
training_columns = X_train.columns
print("Columns in training data:", training_columns)


Feature importances: [5.82439055e-02 8.84560369e-01 3.61189621e-02 5.89474745e-05
 2.27942480e-05 3.78075729e-03 1.84771507e-03 2.14169105e-03
 2.25381357e-03 4.12723070e-03 6.84381376e-03]
Columns in training data: Index(['Year', 'Present_Price', 'Driven_kms', 'Owner', 'Fuel_Type_CNG',
       'Fuel_Type_Diesel', 'Fuel_Type_Petrol', 'Selling_type_Dealer',
       'Selling_type_Individual', 'Transmission_Automatic',
       'Transmission_Manual'],
      dtype='object')


In [None]:
# Example: Predict the price of a new car
new_data = pd.DataFrame({
    'Year': [2015],
    'Present_Price': [8.5],
    'Driven_kms': [15000],
    'Owner': [0],
    'Fuel_Type_CNG': [0],
    'Fuel_Type_Diesel': [1],
    'Fuel_Type_Petrol': [0],
    'Selling_type_Dealer': [1],
    'Selling_type_Individual': [0],
    'Transmission_Automatic': [1],
    'Transmission_Manual': [0],

    # Add more features based on your dataset
})

# Make prediction
predicted_price = model.predict(new_data)
print(f"Predicted Price: {predicted_price}")


Predicted Price: [6.7539]
