In [19]:
import pandas as pd
people = {
    "first": ['John', 'Jan', 'Corey'],
    "last": ['Doe', 'Doe', 'Schaffer'],
    "email": ['johndoe@example.com', 'jandoe@example.com', 'coreymschaffer@example.com']
}
df = pd.DataFrame(people)

In [30]:
df.columns
df.columns = ['first_name', 'last_name', 'email'] # Use this method to change column names. However, you have to list all
# cols here
df

Unnamed: 0,first_name,last_name,email
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [31]:
# To apply text transformation to columns all at once? Use List Comphrehension technique
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [32]:
# To apply string replace on column names
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [33]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [34]:
# to rename specific columns, use rename method and pass a dictionary in it with inplace=True
df.rename(columns={'first_name':'first', 'last_name': 'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [35]:
# To update single row or a single value, just access the value by iloc or loc (or at) and assign a value
# Please note that use of loc or iloc or at is must
df.iloc[1,1] = 'Smith' # method one
df.loc[1, ['last', 'email']] = ['harper', 'janharper@example.com']
# df.at[1,1] = 'Apple' # Dont use this at operator. It's just confusing
df.at[1,['last', 'email']] = ['Doe', 'jandoe@example.com']
df

Unnamed: 0,first,last,email
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,jandoe@example.com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [36]:
# To update multiple rows together:
df['email'] = df['email'].str.upper()
df

Unnamed: 0,first,last,email
0,John,Doe,JOHNDOE@EXAMPLE.COM
1,Jan,Doe,JANDOE@EXAMPLE.COM
2,Corey,Schaffer,COREYMSCHAFFER@EXAMPLE.COM


In [37]:
# Apply. It is used to apply some function or transformation on a series or a dataframe
# Let's say we want to lower case all emails
def update_email(email):
    return email.lower()

df['email'] = df['email'].apply(update_email)
df

# you can also pass lambda functions
df['email'] = df['email'].apply(lambda x: x.title()) # Lambda function
df

Unnamed: 0,first,last,email
0,John,Doe,Johndoe@Example.Com
1,Jan,Doe,Jandoe@Example.Com
2,Corey,Schaffer,Coreymschaffer@Example.Com


In [38]:
# When apply is used on a dataframe (a group of rows and columns), the transformation is applied on series (rows by default)
# For example, if you want to find out the shortest value in each column
df.apply(lambda x: x.min())

first                         Corey
last                            Doe
email    Coreymschaffer@Example.Com
dtype: object

In [28]:
# applymap can be used only on dataframes. It applies the transformation to all items
# showing length of all elements
df.applymap(len)

Unnamed: 0,first,last,email
0,4,3,19
1,3,3,18
2,5,8,26


In [39]:
# Applying lower casing to all values
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,john,doe,johndoe@example.com
1,jan,doe,jandoe@example.com
2,corey,schaffer,coreymschaffer@example.com


In [43]:
# map can be applied only to a series. Here, the entire series needs to be passed. It is used to replace values of a series
df['first'].map({'Corey': 'Chris', 'Jan': 'Mary'})


0      NaN
1     Mary
2    Chris
Name: first, dtype: object

In [47]:
# The problem with map is that it puts NAN for the values which are not passed. Hence use replace which is a better 
# version of map.
df['first'].replace({'Corey': 'Chris', 'Jan': 'Mary'}, inplace=True)
df
# And to make the change permanent assign the new series to the old one or use inplace=True

Unnamed: 0,first,last,email
0,John,Doe,Johndoe@Example.Com
1,Mary,Doe,Jandoe@Example.Com
2,Chris,Schaffer,Coreymschaffer@Example.Com
