Renaming Inconsistent Values

In [21]:
import pandas as pd

data = {
    'Name': ['Amit', 'Priya', 'John', 'Neha', 'Ravi'],
    'Gender': ['Male', 'F', 'M', 'Female', 'n/a'],
    'Score': [85, '?', 90, 88, 'unknown']
}

df = pd.DataFrame(data)
print(df)

    Name  Gender    Score
0   Amit    Male       85
1  Priya       F        ?
2   John       M       90
3   Neha  Female       88
4   Ravi     n/a  unknown


Replace one specific value

In [22]:
print(df.replace("?",0))

    Name  Gender    Score
0   Amit    Male       85
1  Priya       F        0
2   John       M       90
3   Neha  Female       88
4   Ravi     n/a  unknown


Replacing more than one values at once

In [23]:
print(df.replace({"M":"Male","F":"Female","?":0,"n/a":"Unknown"}))

    Name   Gender    Score
0   Amit     Male       85
1  Priya   Female        0
2   John     Male       90
3   Neha   Female       88
4   Ravi  Unknown  unknown


### Practice Tasks

Q1: Replace '?' and 'unknown' in the 'Score' column with 0

Q2: Standardize 'Gender' column by replacing 'F' → 'Female', 'M' → 'Male', 'n/a' → 'Other'

In [24]:
import pandas as pd

data = {
    'Name': ['Amit', 'Priya', 'John', 'Neha', 'Ravi'],
    'Gender': ['Male', 'F', 'M', 'Female', 'n/a'],
    'Score': [85, '?', 90, 88, 'unknown']
}

df = pd.DataFrame(data)

#ques 1
df["Score"].replace({"?":0,"unknown":0},inplace=True)
print(df['Score'].dtype)
# df["Score"] = score
# print(df['Score'].astype(int))

#ques 2
print(df["Gender"].replace({"F":"Female","M":"Male","n/a":"Other"}))

int64
0      Male
1    Female
2      Male
3    Female
4     Other
Name: Gender, dtype: object


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Score"].replace({"?":0,"unknown":0},inplace=True)
  df["Score"].replace({"?":0,"unknown":0},inplace=True)


Renaming

Renaming the columns

In [25]:
import pandas as pd

data = {
    "Std Name": ["Amit", "Priya", "John"],
    "Maths ": [85, 90, 78],
    "Eng": [88, 92, 80],
    "Sci": [82, 89, 85]
}

df = pd.DataFrame(data, index=["st_001", "st_002", "st_003"])
print(df)


       Std Name  Maths   Eng  Sci
st_001     Amit      85   88   82
st_002    Priya      90   92   89
st_003     John      78   80   85


### Practice Tasks

Q1. Rename "Std Name" to "Name"

Q2. Rename "Maths " to "Maths" (remove the space)

Q3. Rename "Eng" and "Sci" to "English" and "Science"

Q4. Rename index "st_001" → "S1", "st_002" → "S2", and "st_003" → "S3"

Q5. Reset the index (convert row labels to default numbers)

In [36]:
#q1 - change std name to name
df.rename(columns={"Std Name":"Name"},inplace=True)
print(df)

#q2 - remove space from maths
df.rename(columns={"Maths ":"Math"},inplace=True)
print(df)

#q3 - rename eng and sci
df.rename(columns={"Eng":"English","Sci":"Science"},inplace=True)
print(df)

#q4 - rename index st_001 to s1
df.rename({"st_001":"S1","st_002":"S2","st_003":"S3"},axis=0,inplace=True)
print(df)

#q5 - reset the index
print(df.reset_index(drop=True))

         Name  Math  English  Science
st_001   Amit    85       88       82
st_002  Priya    90       92       89
st_003   John    78       80       85
         Name  Math  English  Science
st_001   Amit    85       88       82
st_002  Priya    90       92       89
st_003   John    78       80       85
         Name  Math  English  Science
st_001   Amit    85       88       82
st_002  Priya    90       92       89
st_003   John    78       80       85
     Name  Math  English  Science
S1   Amit    85       88       82
S2  Priya    90       92       89
S3   John    78       80       85
    Name  Math  English  Science
0   Amit    85       88       82
1  Priya    90       92       89
2   John    78       80       85


In [None]:
df.rename(index={"stud_2":"Student 2"}, columns={"Eng":"English","Maths":"Math"})