In [1]:
import pandas as pd
import numpy as np

#**Numeric Converter**
---
Purpose:
Converts values that should be numeric but are currently stored in other formats (like strings) into their correct numeric types.

How to Use:

1. (Optional) Create a copy of your DataFrame containing only the columns that need numeric conversion.


2. Pass this DataFrame into the Numeric Converter function.


3. The function returns a new DataFrame where values that are meant to be numeric are converted to their proper types (int, float, etc.).


In [2]:
# Numeric Converter ~NC
def Numeric_Converter(df):
  df_cpy = df.copy()
  df_cpy.dropna(inplace=True)
  dfc = df_cpy.copy()

  for i in dfc:
    if dfc[i].dtype == 'object':

          pun = '!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~₹€£'
          alp = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
          dfc[i] = dfc[i].apply(func = lambda x : x.split()[0])

          for j in pun:
            dfc[i] = dfc[i].str.replace(j,"")

          if dfc[i].str.isalpha().all():
            continue

          for k in alp:
            dfc[i] = dfc[i].str.replace(k,"")
          else:
            df_cpy[i] = pd.to_numeric(dfc[i], errors="coerce")

  return df_cpy


#**Working**

1. Importing the dataset that required cleaning

In [3]:
df = pd.read_csv('Dataset/Car dekho.csv')

2. Calling Numeric_Converter Function

In [4]:
df_new = Numeric_Converter(df)

# Old Dataset

In [5]:
df.head()

Unnamed: 0,Name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Alto 800 LXI Opt,2023,410000,10000,Petrol,Individual,Manual,First Owner,19.03 kmpl,999 CC,71.01bhp,96Nm,5.0
1,Skoda Slavia 1.0 TSI Ambition,2023,1350000,10000,Petrol,Individual,Manual,First Owner,14.08 kmpl,1956 CC,167.67bhp,350nm,5.0
2,BMW 3 Series Gran Limousine 320Ld Luxury Line,2023,5800000,1000,Diesel,Dealer,Automatic,First Owner,18.15 kmpl,998 CC,118.35bhp,172Nm,5.0
3,MG ZS EV Exclusive,2023,2650000,10000,Electric,Dealer,Automatic,First Owner,32.52 kmpl,998 CC,58.33bhp,78Nm,5.0
4,Tata Punch Adventure,2023,715000,10000,Petrol,Individual,Manual,First Owner,12.15 kmpl,1451 CC,141bhp,250Nm,5.0


# New Dataset

In [6]:
df_new.head()

Unnamed: 0,Name,year,selling_price,km_driven,fuel,seller_type,transmission,owner,mileage,engine,max_power,torque,seats
0,Maruti Alto 800 LXI Opt,2023,410000,10000,Petrol,Individual,Manual,First Owner,19.03,999,71.01,96.0,5.0
1,Skoda Slavia 1.0 TSI Ambition,2023,1350000,10000,Petrol,Individual,Manual,First Owner,14.08,1956,167.67,350.0,5.0
2,BMW 3 Series Gran Limousine 320Ld Luxury Line,2023,5800000,1000,Diesel,Dealer,Automatic,First Owner,18.15,998,118.35,172.0,5.0
3,MG ZS EV Exclusive,2023,2650000,10000,Electric,Dealer,Automatic,First Owner,32.52,998,58.33,78.0,5.0
4,Tata Punch Adventure,2023,715000,10000,Petrol,Individual,Manual,First Owner,12.15,1451,141.0,250.0,5.0


# Code Impact

* Codes previously used to shape the dataset to this level:

1. `df.dropna(inplace=True)`  
2. `df.reset_index(drop=True, inplace=True)`  
3. `df["mileage"] = df["mileage"].str.strip(" kmplg/")`  
   `df["mileage"] = df["mileage"].astype(float)`  
4. `df["engine"] = df["engine"].str.strip(" C")`  
   `df["engine"] = df["engine"].astype(int)`  
5. `df["max_power"] = df["max_power"].str.strip(" bhp")`  
   `df["max_power"] = df["max_power"].astype(float)`  
6. `df["torque"] = df["torque"].apply(func=lambda x: x.upper().split()[0])`  
   `df["torque"] = df["torque"].apply(func=lambda x: x.upper().split("N")[0])`  
   `df["torque"] = df["torque"].apply(func=lambda x: x.upper().split("K")[0])`  
   `df["torque"] = df["torque"].apply(func=lambda x: x.upper().split("@")[0])`  
   `df["torque"] = df["torque"].apply(func=lambda x: x.upper().split("(")[0])`  
   `df["torque"] = df["torque"].astype(float)`

* This function simplifies the entire process, replacing multiple lines of code and all the hard thinking with just one step:

1. `df_new = Numeric_Converter(df)`