# Updating Rows and Columns - Modifying Data Within DataFrames

In [1]:
import pandas as pd

In [2]:
People = {
    "First Name" : ["Asif Antaj", "Afrin Jahan", "Afifa Jahan", "Asif Antaj"],
    "Last Name" : ["Robin", "Rupa", "Rafa", "Rayhan"],
    "Email" : ["AsifAntajRobin@Gmal.Com", "Afrin@Cmail.Com", "Afifa@Gmail.Com", "Asif@Gmail.Com"]
}

In [3]:
df = pd.DataFrame(People)
df

Unnamed: 0,First Name,Last Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com


In [4]:
df.columns

Index(['First Name', 'Last Name', 'Email'], dtype='object')

In [5]:
df.columns = ["First_Name", "Last_Name", "User_Email"]
df

Unnamed: 0,First_Name,Last_Name,User_Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com


In [6]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,USER_EMAIL
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com


In [7]:
df.columns = df.columns.str.replace("_", " ")
df

Unnamed: 0,FIRST NAME,LAST NAME,USER EMAIL
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com


In [8]:
df.rename(columns = {"FIRST NAME":"First_Name", "LAST NAME":"Last_Name", "USER EMAIL":"Email"}, inplace = True)
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com


In [9]:
df.loc[3]

First_Name        Asif Antaj
Last_Name             Rayhan
Email         Asif@Gmail.Com
Name: 3, dtype: object

In [10]:
#add row
df.loc[4] = ["Md Arif", "Uddin", "arif@gmail.com"]
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com
4,Md Arif,Uddin,arif@gmail.com


In [11]:
#update row
df.loc[4, ["First_Name", "Email"]] = ["Md Azim", "Azim@Gmail.Com"]
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com
4,Md Azim,Uddin,Azim@Gmail.Com


In [12]:
filt = (df['Email'] == 'azim@gmail.com')
df[filt]["Last_Name"] = "Khan"
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com
4,Md Azim,Uddin,Azim@Gmail.Com


In [13]:
df.loc[filt, "Last_Name"] = "Khan"
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,AsifAntajRobin@Gmal.Com
1,Afrin Jahan,Rupa,Afrin@Cmail.Com
2,Afifa Jahan,Rafa,Afifa@Gmail.Com
3,Asif Antaj,Rayhan,Asif@Gmail.Com
4,Md Azim,Uddin,Azim@Gmail.Com


In [14]:
df["Email"].str.lower()

0    asifantajrobin@gmal.com
1            afrin@cmail.com
2            afifa@gmail.com
3             asif@gmail.com
4             azim@gmail.com
Name: Email, dtype: object

In [15]:
df["Email"] = df["Email"].str.lower()
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,asifantajrobin@gmal.com
1,Afrin Jahan,Rupa,afrin@cmail.com
2,Afifa Jahan,Rafa,afifa@gmail.com
3,Asif Antaj,Rayhan,asif@gmail.com
4,Md Azim,Uddin,azim@gmail.com


In [16]:
df["Email"].apply(len)

0    23
1    15
2    15
3    14
4    14
Name: Email, dtype: int64

In [17]:
def update_email(Email):
    return Email.upper()

In [18]:
df["Email"].apply(update_email)

0    ASIFANTAJROBIN@GMAL.COM
1            AFRIN@CMAIL.COM
2            AFIFA@GMAIL.COM
3             ASIF@GMAIL.COM
4             AZIM@GMAIL.COM
Name: Email, dtype: object

In [19]:
df["Email"] = df["Email"].apply(update_email)
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,ASIFANTAJROBIN@GMAL.COM
1,Afrin Jahan,Rupa,AFRIN@CMAIL.COM
2,Afifa Jahan,Rafa,AFIFA@GMAIL.COM
3,Asif Antaj,Rayhan,ASIF@GMAIL.COM
4,Md Azim,Uddin,AZIM@GMAIL.COM


In [20]:
df["Email"] = df["Email"].apply(lambda x: x.lower())
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,asifantajrobin@gmal.com
1,Afrin Jahan,Rupa,afrin@cmail.com
2,Afifa Jahan,Rafa,afifa@gmail.com
3,Asif Antaj,Rayhan,asif@gmail.com
4,Md Azim,Uddin,azim@gmail.com


In [21]:
df["Email"].apply(len)

0    23
1    15
2    15
3    14
4    14
Name: Email, dtype: int64

In [22]:
len(df['Email'])

5

In [23]:
df.apply(len, axis = "rows")

First_Name    5
Last_Name     5
Email         5
dtype: int64

In [24]:
df.apply(len, axis = 0)

First_Name    5
Last_Name     5
Email         5
dtype: int64

In [25]:
df.apply(len, axis = 1) # 1 = columns

0    3
1    3
2    3
3    3
4    3
dtype: int64

In [26]:
df.apply(pd.Series.min)

First_Name        Afifa Jahan
Last_Name                Rafa
Email         afifa@gmail.com
dtype: object

In [27]:
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,asifantajrobin@gmal.com
1,Afrin Jahan,Rupa,afrin@cmail.com
2,Afifa Jahan,Rafa,afifa@gmail.com
3,Asif Antaj,Rayhan,asif@gmail.com
4,Md Azim,Uddin,azim@gmail.com


In [28]:
df.apply(lambda x: x.min())

First_Name        Afifa Jahan
Last_Name                Rafa
Email         afifa@gmail.com
dtype: object

In [29]:
df.applymap(len)

  df.applymap(len)


Unnamed: 0,First_Name,Last_Name,Email
0,10,5,23
1,11,4,15
2,11,4,15
3,10,6,14
4,7,5,14


In [30]:
df.applymap(str.lower)

  df.applymap(str.lower)


Unnamed: 0,First_Name,Last_Name,Email
0,asif antaj,robin,asifantajrobin@gmal.com
1,afrin jahan,rupa,afrin@cmail.com
2,afifa jahan,rafa,afifa@gmail.com
3,asif antaj,rayhan,asif@gmail.com
4,md azim,uddin,azim@gmail.com


In [31]:
df

Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,asifantajrobin@gmal.com
1,Afrin Jahan,Rupa,afrin@cmail.com
2,Afifa Jahan,Rafa,afifa@gmail.com
3,Asif Antaj,Rayhan,asif@gmail.com
4,Md Azim,Uddin,azim@gmail.com


In [32]:
df['Email'].replace({"afrin@cmail.com":"afrinjahanrupa@Gmail.com", "azim@gmail.com":"azimuddin@gmail.com"}, inplace = True)
df

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Email'].replace({"afrin@cmail.com":"afrinjahanrupa@Gmail.com", "azim@gmail.com":"azimuddin@gmail.com"}, inplace = True)


Unnamed: 0,First_Name,Last_Name,Email
0,Asif Antaj,Robin,asifantajrobin@gmal.com
1,Afrin Jahan,Rupa,afrinjahanrupa@Gmail.com
2,Afifa Jahan,Rafa,afifa@gmail.com
3,Asif Antaj,Rayhan,asif@gmail.com
4,Md Azim,Uddin,azimuddin@gmail.com


In [33]:
df = pd.read_csv("employee_data.csv")
df.head()

Unnamed: 0,EmployeeID,Name,Age,Gender,Department,JobTitle,Salary,DateOfJoining,MaritalStatus,Education,...,Email,PhoneNumber,City,EmploymentType,IsRemote,LastWorkingDay,LeavesTakenLastYear,ProjectsHandled,Certifications,PromotionEligible
0,1000,George Williams,50,Male,HR,Consultant,65057,10/8/2017,Single,Master,...,george.williams@corporate.org,1762717327,Dhaka,Contract,No,16:21.8,18,1,AWS,Yes
1,1001,Jane Brown,36,Female,Sales,Executive,31239,3/7/2021,Single,PhD,...,jane.brown@corporate.org,1566784995,Sylhet,Part-time,No,54:32.7,10,5,Scrum Master,Yes
2,1002,George Johnson,29,Female,Support,Manager,95661,9/27/2016,Single,High School,...,george.johnson@corporate.org,1348371389,Rajshahi,Full-time,Yes,54:32.7,17,9,AWS,Yes
3,1003,Hannah Williams,42,Other,Engineering,Manager,75272,11/18/2018,Single,PhD,...,hannah.williams@corporate.org,1564436727,Barisal,Full-time,No,16:21.8,3,3,Data Analyst,Yes
4,1004,George Davis,40,Other,IT,Developer,119393,3/14/2019,Married,Master,...,george.davis@example.com,1130130576,Dhaka,Part-time,Yes,27:16.4,15,4,Scrum Master,No


In [34]:
df.rename(columns = {"Salary":"SalaryBDT"}, inplace = True)
df.columns

Index(['EmployeeID', 'Name', 'Age', 'Gender', 'Department', 'JobTitle',
       'SalaryBDT', 'DateOfJoining', 'MaritalStatus', 'Education', 'OverTime',
       'MonthlyIncome', 'Hobbies', 'ProgrammingSkills', 'Religion', 'Email',
       'PhoneNumber', 'City', 'EmploymentType', 'IsRemote', 'LastWorkingDay',
       'LeavesTakenLastYear', 'ProjectsHandled', 'Certifications',
       'PromotionEligible'],
      dtype='object')

In [35]:
df["IsRemote"]

0       No
1       No
2      Yes
3       No
4      Yes
      ... 
545    Yes
546    Yes
547    Yes
548    Yes
549     No
Name: IsRemote, Length: 550, dtype: object

In [36]:
df["IsRemote"].value_counts()

IsRemote
No     291
Yes    259
Name: count, dtype: int64

In [37]:
df["IsRemote"].map({"Yes":True, "No":False})

0      False
1      False
2       True
3      False
4       True
       ...  
545     True
546     True
547     True
548     True
549    False
Name: IsRemote, Length: 550, dtype: bool

In [38]:
df

Unnamed: 0,EmployeeID,Name,Age,Gender,Department,JobTitle,SalaryBDT,DateOfJoining,MaritalStatus,Education,...,Email,PhoneNumber,City,EmploymentType,IsRemote,LastWorkingDay,LeavesTakenLastYear,ProjectsHandled,Certifications,PromotionEligible
0,1000,George Williams,50,Male,HR,Consultant,65057,10/8/2017,Single,Master,...,george.williams@corporate.org,1762717327,Dhaka,Contract,No,16:21.8,18,1,AWS,Yes
1,1001,Jane Brown,36,Female,Sales,Executive,31239,3/7/2021,Single,PhD,...,jane.brown@corporate.org,1566784995,Sylhet,Part-time,No,54:32.7,10,5,Scrum Master,Yes
2,1002,George Johnson,29,Female,Support,Manager,95661,9/27/2016,Single,High School,...,george.johnson@corporate.org,1348371389,Rajshahi,Full-time,Yes,54:32.7,17,9,AWS,Yes
3,1003,Hannah Williams,42,Other,Engineering,Manager,75272,11/18/2018,Single,PhD,...,hannah.williams@corporate.org,1564436727,Barisal,Full-time,No,16:21.8,3,3,Data Analyst,Yes
4,1004,George Davis,40,Other,IT,Developer,119393,3/14/2019,Married,Master,...,george.davis@example.com,1130130576,Dhaka,Part-time,Yes,27:16.4,15,4,Scrum Master,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,1545,Bob Jones,39,Male,Engineering,Technician,68714,7/3/2014,Single,Bachelor,...,bob.jones@example.com,1508844502,Rajshahi,Full-time,Yes,54:32.7,11,1,AWS,No
546,1546,Jane Williams,35,Other,Logistics,Developer,84093,9/1/2017,Divorced,PhD,...,jane.williams@corporate.org,1947982501,Khulna,Part-time,Yes,38:10.9,25,4,PMP,Yes
547,1547,Diana Wilson,36,Female,Engineering,Analyst,85419,8/3/2023,Divorced,Master,...,diana.wilson@example.com,1361782146,Sylhet,Contract,Yes,32:43.6,26,0,CCNA,No
548,1548,Hannah Taylor,52,Male,IT,Developer,79145,5/22/2010,Married,Master,...,hannah.taylor@company.com,1318980404,Chittagong,Contract,Yes,10:54.5,16,8,AWS,Yes


In [39]:
df["IsRemote"].replace({"Yes":True, "No":False})

  df["IsRemote"].replace({"Yes":True, "No":False})


0      False
1      False
2       True
3      False
4       True
       ...  
545     True
546     True
547     True
548     True
549    False
Name: IsRemote, Length: 550, dtype: bool

In [40]:
df

Unnamed: 0,EmployeeID,Name,Age,Gender,Department,JobTitle,SalaryBDT,DateOfJoining,MaritalStatus,Education,...,Email,PhoneNumber,City,EmploymentType,IsRemote,LastWorkingDay,LeavesTakenLastYear,ProjectsHandled,Certifications,PromotionEligible
0,1000,George Williams,50,Male,HR,Consultant,65057,10/8/2017,Single,Master,...,george.williams@corporate.org,1762717327,Dhaka,Contract,No,16:21.8,18,1,AWS,Yes
1,1001,Jane Brown,36,Female,Sales,Executive,31239,3/7/2021,Single,PhD,...,jane.brown@corporate.org,1566784995,Sylhet,Part-time,No,54:32.7,10,5,Scrum Master,Yes
2,1002,George Johnson,29,Female,Support,Manager,95661,9/27/2016,Single,High School,...,george.johnson@corporate.org,1348371389,Rajshahi,Full-time,Yes,54:32.7,17,9,AWS,Yes
3,1003,Hannah Williams,42,Other,Engineering,Manager,75272,11/18/2018,Single,PhD,...,hannah.williams@corporate.org,1564436727,Barisal,Full-time,No,16:21.8,3,3,Data Analyst,Yes
4,1004,George Davis,40,Other,IT,Developer,119393,3/14/2019,Married,Master,...,george.davis@example.com,1130130576,Dhaka,Part-time,Yes,27:16.4,15,4,Scrum Master,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,1545,Bob Jones,39,Male,Engineering,Technician,68714,7/3/2014,Single,Bachelor,...,bob.jones@example.com,1508844502,Rajshahi,Full-time,Yes,54:32.7,11,1,AWS,No
546,1546,Jane Williams,35,Other,Logistics,Developer,84093,9/1/2017,Divorced,PhD,...,jane.williams@corporate.org,1947982501,Khulna,Part-time,Yes,38:10.9,25,4,PMP,Yes
547,1547,Diana Wilson,36,Female,Engineering,Analyst,85419,8/3/2023,Divorced,Master,...,diana.wilson@example.com,1361782146,Sylhet,Contract,Yes,32:43.6,26,0,CCNA,No
548,1548,Hannah Taylor,52,Male,IT,Developer,79145,5/22/2010,Married,Master,...,hannah.taylor@company.com,1318980404,Chittagong,Contract,Yes,10:54.5,16,8,AWS,Yes


In [41]:
df["IsRemote"] = df["IsRemote"].replace({"Yes":True, "No":False})
df.head()

  df["IsRemote"] = df["IsRemote"].replace({"Yes":True, "No":False})


Unnamed: 0,EmployeeID,Name,Age,Gender,Department,JobTitle,SalaryBDT,DateOfJoining,MaritalStatus,Education,...,Email,PhoneNumber,City,EmploymentType,IsRemote,LastWorkingDay,LeavesTakenLastYear,ProjectsHandled,Certifications,PromotionEligible
0,1000,George Williams,50,Male,HR,Consultant,65057,10/8/2017,Single,Master,...,george.williams@corporate.org,1762717327,Dhaka,Contract,False,16:21.8,18,1,AWS,Yes
1,1001,Jane Brown,36,Female,Sales,Executive,31239,3/7/2021,Single,PhD,...,jane.brown@corporate.org,1566784995,Sylhet,Part-time,False,54:32.7,10,5,Scrum Master,Yes
2,1002,George Johnson,29,Female,Support,Manager,95661,9/27/2016,Single,High School,...,george.johnson@corporate.org,1348371389,Rajshahi,Full-time,True,54:32.7,17,9,AWS,Yes
3,1003,Hannah Williams,42,Other,Engineering,Manager,75272,11/18/2018,Single,PhD,...,hannah.williams@corporate.org,1564436727,Barisal,Full-time,False,16:21.8,3,3,Data Analyst,Yes
4,1004,George Davis,40,Other,IT,Developer,119393,3/14/2019,Married,Master,...,george.davis@example.com,1130130576,Dhaka,Part-time,True,27:16.4,15,4,Scrum Master,No


In [42]:
df["IsRemote"].value_counts()

IsRemote
False    291
True     259
Name: count, dtype: int64