In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [2]:
data = pd.read_csv("car data.csv")
data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [3]:
data.columns

Index(['Car_Name', 'Year', 'Selling_Price', 'Present_Price', 'Driven_kms',
       'Fuel_Type', 'Selling_type', 'Transmission', 'Owner'],
      dtype='object')

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Driven_kms     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Selling_type   301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


In [5]:
data.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Driven_kms       0
Fuel_Type        0
Selling_type     0
Transmission     0
Owner            0
dtype: int64

In [6]:
data['Fuel_Type'].unique()
data['Fuel_Type'].value_counts()

Fuel_Type
Petrol    239
Diesel     60
CNG         2
Name: count, dtype: int64

In [7]:
data['Owner'].unique()


array([0, 1, 3])

In [8]:
data['Transmission'].unique()

array(['Manual', 'Automatic'], dtype=object)

In [9]:
data['Selling_type'].unique()

array(['Dealer', 'Individual'], dtype=object)

In [None]:
# -----------------------------------------------
# üöó CAR PRICE PREDICTION PROJECT (Using replace)
# -----------------------------------------------

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# -----------------------------------------------
# 1) LOAD DATA
# -----------------------------------------------
data = pd.read_csv("car data.csv")   # change filename if needed

print("Dataset Loaded Successfully!\n")
print(data.head())


# -----------------------------------------------
# 2) CLEANING
# -----------------------------------------------

# Remove commas, km, ‚Çπ symbol etc.
data['Present_Price'] = data['Present_Price'].astype(str).str.replace(",", "", regex=True).astype(float)
data['Selling_Price'] = data['Selling_Price'].astype(str).str.replace(",", "", regex=True).astype(float)
data['Driven_kms'] = data['Driven_kms'].astype(str).str.replace(",", "", regex=True)
data['Driven_kms'] = data['Driven_kms'].str.replace("km", "", regex=False).str.strip()
data['Driven_kms'] = data['Driven_kms'].astype(float)

# Fix Year column
data['Year'] = data['Year'].astype(int)

# Create AGE feature
data['Age'] = 2024 - data['Year']


# -----------------------------------------------
# 3) ENCODING USING REPLACE()
# -----------------------------------------------

data.replace({
    'Fuel_Type': {
        'Petrol': 0,
        'Diesel': 1,
        'CNG': 2
    }
}, inplace=True)

data.replace({
    'Selling_type': {
        'Dealer': 0,
        'Individual': 1
    }
}, inplace=True)

data.replace({
    'Transmission': {
        'Manual': 0,
        'Automatic': 1
    }
}, inplace=True)


# -----------------------------------------------
# 4) SELECT FEATURES
# -----------------------------------------------

X = data[['Age', 'Present_Price', 'Driven_kms', 
          'Fuel_Type', 'Selling_type', 'Transmission', 'Owner']]

y = data['Selling_Price']


# -----------------------------------------------
# 5) TRAIN MODEL
# -----------------------------------------------

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

print("\nModel Trained Successfully!")


# -----------------------------------------------
# 6) USER INPUT
# -----------------------------------------------

print("\n--------- ENTER CAR DETAILS TO PREDICT PRICE ---------")

age = float(input("Enter Car Age (in years): "))
present_price = float(input("Enter Present Price of the car (in lakhs): "))
driven = float(input("Enter Driven Kilometers: "))

fuel = input("Enter Fuel Type (Petrol / Diesel / CNG): ").title()
if fuel not in ["Petrol", "Diesel", "Cng"]:
    print("Invalid Fuel Type!")
    exit()
fuel_map = {"Petrol": 0, "Diesel": 1, "Cng": 2}
fuel_val = fuel_map[fuel]

selling = input("Enter Selling Type (Dealer / Individual): ").title()
if selling not in ["Dealer", "Individual"]:
    print("Invalid Selling Type!")
    exit()
sell_map = {"Dealer": 0, "Individual": 1}
sell_val = sell_map[selling]

trans = input("Enter Transmission (Manual / Automatic): ").title()
if trans not in ["Manual", "Automatic"]:
    print("Invalid Transmission Type!")
    exit()
trans_map = {"Manual": 0, "Automatic": 1}
trans_val = trans_map[trans]

owner = int(input("Enter Owner Count (0/1/2/3): "))


# -----------------------------------------------
# 7) MAKE PREDICTION
# -----------------------------------------------

user_input = pd.DataFrame([[
    age, present_price, driven,
    fuel_val, sell_val, trans_val, owner
]], columns=X.columns)

pred_price = model.predict(user_input)[0]

print("\n-------------------------------------------------------")
print(f"Estimated Selling Price: ‚Çπ{pred_price * 1_00_000:,.0f}")
print("-------------------------------------------------------")


Dataset Loaded Successfully!

  Car_Name  Year  Selling_Price  Present_Price  Driven_kms Fuel_Type  \
0     ritz  2014           3.35           5.59       27000    Petrol   
1      sx4  2013           4.75           9.54       43000    Diesel   
2     ciaz  2017           7.25           9.85        6900    Petrol   
3  wagon r  2011           2.85           4.15        5200    Petrol   
4    swift  2014           4.60           6.87       42450    Diesel   

  Selling_type Transmission  Owner  
0       Dealer       Manual      0  
1       Dealer       Manual      0  
2       Dealer       Manual      0  
3       Dealer       Manual      0  
4       Dealer       Manual      0  


  data.replace({
  data.replace({
  data.replace({



Model Trained Successfully!

--------- ENTER CAR DETAILS TO PREDICT PRICE ---------


In [4]:
# --------------------------------------------------------
# üöó CAR PRICE PREDICTION (Simple Version for Your Dataset)
# --------------------------------------------------------

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor

# 1) LOAD DATA
data = pd.read_csv("car data.csv")
print("Dataset Loaded Successfully!")

# 2) ENCODING (using replace)
data.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
data.replace({'Selling_type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
data.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)

# 3) SELECT FEATURES
X = data[['Year', 'Present_Price', 'Driven_kms', 'Fuel_Type',
          'Selling_type', 'Transmission', 'Owner']]
y = data['Selling_Price']

# 4) TRAIN MODEL
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42)

model = RandomForestRegressor(random_state=42)
model.fit(X_train, y_train)

print("\nModel Trained Successfully!")


# --------------------------------------------------------
# 5) USER INPUT SECTION
# --------------------------------------------------------

print("\n-------- ENTER CAR DETAILS --------")

year = int(input("Enter Car Year (e.g., 2015): "))
present_price = float(input("Enter Present Price (in lakhs): "))
driven = int(input("Enter Driven Kilometers: "))

# Fuel Type Input
fuel = input("Enter Fuel Type (Petrol / Diesel / CNG): ").title()
fuel_map = {"Petrol": 0, "Diesel": 1, "Cng": 2}
if fuel not in fuel_map:
    print("‚ùå Invalid Fuel Type! Please enter Petrol/Diesel/CNG")
    exit()
fuel_val = fuel_map[fuel]

# Selling Type Input
sell = input("Selling Type (Dealer / Individual): ").title()
sell_map = {"Dealer": 0, "Individual": 1}
if sell not in sell_map:
    print("‚ùå Invalid Selling Type!")
    exit()
sell_val = sell_map[sell]

# Transmission Input
trans = input("Transmission (Manual / Automatic): ").title()
trans_map = {"Manual": 0, "Automatic": 1}
if trans not in trans_map:
    print("‚ùå Invalid Transmission!")
    exit()
trans_val = trans_map[trans]

owner = int(input("Enter Owner Count (0/1/2/3): "))

# --------------------------------------------------------
# 6) PREDICTION
# --------------------------------------------------------

user_input = pd.DataFrame([[
    year, present_price, driven, fuel_val, sell_val, trans_val, owner
]], columns=X.columns)

pred_price = model.predict(user_input)[0]

print("\n------------------------------------------")
print(f"Estimated Selling Price: ‚Çπ{pred_price * 1_00_000:,.0f}")
print("------------------------------------------")


Dataset Loaded Successfully!


  data.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2}}, inplace=True)
  data.replace({'Selling_type': {'Dealer': 0, 'Individual': 1}}, inplace=True)
  data.replace({'Transmission': {'Manual': 0, 'Automatic': 1}}, inplace=True)



Model Trained Successfully!

-------- ENTER CAR DETAILS --------


Enter Car Year (e.g., 2015):  2014
Enter Present Price (in lakhs):  5.24
Enter Driven Kilometers:  4795
Enter Fuel Type (Petrol / Diesel / CNG):  cng
Selling Type (Dealer / Individual):  dealer
Transmission (Manual / Automatic):  manual
Enter Owner Count (0/1/2/3):  1



------------------------------------------
Estimated Selling Price: ‚Çπ345,400
------------------------------------------
