In [1]:
import pandas as pd

In [2]:
data = pd.read_csv("car data.csv")
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [3]:
data.drop('Owner', axis=1, inplace=True)
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual


In [4]:
features = ['Year','Selling_Price','Present_Price','Kms_Driven','Fuel_Type','Seller_Type','Transmission']
target = 'Car_Name'

In [5]:
X = data[features].copy()
y = data[target]

In [6]:
print(data.isnull().sum())

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
dtype: int64


In [7]:
# handling null values if any
data['Selling_Price'].fillna(data['Selling_Price'].mean(), inplace=True)
data['Present_Price'].fillna(data['Present_Price'].mean(), inplace=True)
# since these 3 are categorical columns
data['Fuel_Type'].fillna(data['Fuel_Type'].mode()[0], inplace=True)
data['Seller_Type'].fillna(data['Seller_Type'].mode()[0], inplace=True)
data['Transmission'].fillna(data['Transmission'].mode()[0], inplace=True)


In [8]:
print(data.isnull().sum())

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
dtype: int64


In [9]:
from sklearn.preprocessing import LabelEncoder

In [10]:
label_encoder = LabelEncoder()

In [11]:
X['Fuel_Type'] = label_encoder.fit_transform(X['Fuel_Type'])
X['Seller_Type'] = label_encoder.fit_transform(X['Seller_Type'])
X['Transmission'] = label_encoder.fit_transform(X['Transmission'])

In [12]:
from sklearn.model_selection import train_test_split,cross_val_score

In [13]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)

In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
model = RandomForestClassifier(n_estimators = 100, random_state = 42)
model.fit(X_train, y_train)

In [16]:
predictions = model.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score,confusion_matrix, f1_score
from sklearn.metrics import classification_report

In [18]:
accuracy = accuracy_score(y_test, predictions)
print("Accuracy : ",accuracy)

Accuracy :  0.4426229508196721


In [19]:
print("Confusion Matrix:")
print(confusion_matrix(y_test, predictions))

Confusion Matrix:
[[0 0 0 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 4 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [20]:
f1 = f1_score(y_test, predictions, average='weighted')
print(f"F1 Score: {f1}")

# Cross-validation
cv_scores = cross_val_score(model, X, y, cv=5)  # 5-fold cross-validation
print(f"Cross-validation scores: {cv_scores}")
print(f"Mean Cross-validation score: {cv_scores.mean()}") 

F1 Score: 0.4294621589703557




Cross-validation scores: [0.39344262 0.45       0.46666667 0.41666667 0.45      ]
Mean Cross-validation score: 0.4353551912568306


In [21]:
report = classification_report(y_test, predictions)
print("Classification Report : ",report)

Classification Report :                             precision    recall  f1-score   support

                Activa 4g       0.00      0.00      0.00         1
   Bajaj Avenger 220 dtsi       0.00      0.00      0.00         1
 Bajaj Avenger Street 220       0.00      0.00      0.00         0
       Bajaj Discover 125       0.00      0.00      0.00         1
     Bajaj Pulsar  NS 200       0.00      0.00      0.00         1
      Bajaj Pulsar 135 LS       0.00      0.00      0.00         0
             Hero Extreme       0.00      0.00      0.00         1
     Hero Splender iSmart       1.00      1.00      1.00         1
         Honda Activa 125       0.00      0.00      0.00         1
          Honda Activa 4G       0.00      0.00      0.00         0
           Honda CB Shine       0.00      0.00      0.00         1
         Honda CB Trigger       0.00      0.00      0.00         1
         Honda CB Unicorn       0.00      0.00      0.00         0
         Honda CB twister       0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [27]:
def get_recommendation(year, selling_price, present_price, kms_driven, fuel_type, seller_type, transmission):
    # Create DataFrame for user input
    input_data = pd.DataFrame({
        'Year': [year],
        'Selling_Price': [selling_price],
        'Present_Price': [present_price],
        'Kms_Driven': [kms_driven],
        'Fuel_Type': [fuel_type],
        'Seller_Type': [seller_type],
        'Transmission': [transmission],
    })

    # Encoding categorical variables
    label_encoder = LabelEncoder()
    input_data['Fuel_Type'] = label_encoder.fit_transform(input_data['Fuel_Type'])
    input_data['Seller_Type'] = label_encoder.fit_transform(input_data['Seller_Type'])
    input_data['Transmission'] = label_encoder.fit_transform(input_data['Transmission'])

    # Make prediction using the trained model
    prediction = model.predict(input_data)
    return prediction[0]  # Returning the predicted car name


# Example of getting real-time input
year = int(input("Enter the car's year of production: "))
selling_price = float(input("Enter the selling price of the car: "))
present_price = float(input("Enter the present price of the car: "))
kms_driven = int(input("Enter the total kilometers driven: "))
fuel_type = input("Enter the fuel type (e.g., Petrol, Diesel, CNG): ")
seller_type = input("Enter the seller type (e.g., Dealer, Individual): ")
transmission = input("Enter the transmission type (e.g., Manual, Automatic): ")


# Get recommendation using the model
recommended_car = get_recommendation(year, selling_price, present_price, kms_driven, fuel_type, seller_type, transmission)

print(f"The recommended car is: {recommended_car}")

Enter the car's year of production: 2019
Enter the selling price of the car: 12
Enter the present price of the car: 12
Enter the total kilometers driven: 2000
Enter the fuel type (e.g., Petrol, Diesel, CNG): CNG
Enter the seller type (e.g., Dealer, Individual): Individual
Enter the transmission type (e.g., Manual, Automatic): Automatic
The recommended car is: innova
