In [1]:
people = {
    "first": ["Griffin", "Jane", "John"],
    "last": ["Sargent", "Doe", "Doe"],
    "email": ["griffin.a.sargent@gmail.com", "jane@email.com", "MikeJ@email.com"]
}

In [2]:
import pandas as pd

In [4]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Doe,MikeJ@email.com


# Updating columns

In [6]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [8]:
# we can update column names by passing in full list of names
df.columns = ['first_name', 'last_name', 'email']
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [9]:
# or we can use a list comprehension to formulaically change names
df.columns = [x.upper() for x in df.columns]
df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [15]:
# or something similar to remove spaces
df.columns = df.columns.str.replace('_', ' ')
print(df.columns)
df.columns = df.columns.str.replace(' ', '_')
df.columns


Index(['FIRST NAME', 'LAST NAME', 'EMAIL'], dtype='object')


Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [16]:
# lets reset to original column names
df.columns = [x.lower() for x in df.columns]
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [19]:
# lets use a different approach, use the inplace=True
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)
df.columns

Index(['first', 'last', 'email'], dtype='object')

# Updating rows

In [23]:
# recall we can use df.loc to get to specific rows, you can update an entire row like this
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Smith,JohnSmith@email.com


In [26]:
# Or we can use the 2nd arguemnt of df.loc to specify columns as well
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Doe,JohnDoe@email.com


In [28]:
# Here's a common mistake ppl make
# We want to update all 'Smiths' and use a filter
filt = (df['email'] == 'JohnDoe@email.com')
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,JohnDoe@email.com


In [29]:
df[filt]['last']

2    Doe
Name: last, dtype: object

In [30]:
# This illustrates 'views' vs 'copies'
# Moral of the story: Use df.loc to update values
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [32]:
# This is a better way of doing it
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Smith,JohnDoe@email.com


In [34]:
# Updating more than one row?  You can always just assign whatever to a column
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Smith,johndoe@email.com


### Here's something more advanced

#### 4 methods to try here
 - apply
 - map
 - applymap
 - replace

In [35]:
# apply will take an existing function and apply it to all rows (does not apply in place)
df['email'].apply(len)

0    27
1    14
2    17
Name: email, dtype: int64

In [36]:
def update_email(email):
    return email.upper()

In [37]:
df['email'].apply(update_email)

0    GRIFFIN.A.SARGENT@GMAIL.COM
1                 JANE@EMAIL.COM
2              JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [39]:
# To apply permanantly, you must set the column to that
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,GRIFFIN.A.SARGENT@GMAIL.COM
1,Jane,Doe,JANE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [41]:
# Alternatively we could use a lambda function in place of another already defined function
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,Griffin,Sargent,griffin.a.sargent@gmail.com
1,Jane,Doe,jane@email.com
2,John,Smith,johndoe@email.com


In [42]:
# apply only works on a single column, When you apply it to the entire df, it acts on the rows
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [43]:
df.apply(lambda x: x.min())

first                        Griffin
last                             Doe
email    griffin.a.sargent@gmail.com
dtype: object

In [44]:
# applymap can be used on all items in the dataframe
df.applymap(len)

Unnamed: 0,first,last,email
0,7,7,27
1,4,3,14
2,4,5,17


In [45]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,griffin,sargent,griffin.a.sargent@gmail.com
1,jane,doe,jane@email.com
2,john,smith,johndoe@email.com


In [47]:
# map can be used with a dictionary
df['first'].map({'Griffin': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [48]:
# replace works similarly
df['first'].replace({'Griffin': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object