In [1]:
people = {
    "first": ["Corey", 'Jane', 'John'], 
    "last": ["Schafer", 'Doe', 'Doe'], 
    "email": ["CoreyMSchafer@gmail.com", 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [12]:
# renames all the columns by passing in a list as an assignment
df.columns = ['first name', 'last name', 'email']

In [13]:
df

Unnamed: 0,first name,last name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [14]:
# repalces all spaces w/ underscores
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [15]:
# upper cases all the column names
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
df.columns= [x.lower() for x in df.columns]

# pass in a dictionary to rename specific columns using the rename() method
# inplace=True to make changes carry through
df.rename(columns={'first_name': 'first', 'last_name': 'last'}, inplace=True)

In [17]:
# updates all values for index=2
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [19]:
# updates only select columns
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']

In [20]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [21]:
# changes a single value (no list is needed for single value assignment)
df.loc[2, 'last'] = 'Smith'

In [22]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [23]:
# can also .at to change a single value
df.at[2, 'last'] = 'Doe'

In [24]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [27]:
filt = (df['email'] == 'JohnDoe@email.com')

# in order to set values you need to use a .loc, .iloc, or .at indexer otherwise it will throw the below error
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt] ['last'] = 'Smith'


In [28]:
filt = (df['email'] == 'JohnDoe@email.com')

# applies filters to rows and updates values in last column
df.loc[filt, 'last'] = 'Smith'

In [30]:
# changing multiple rows at once
# updates email column to use all lowercasing for user emails
df['email'] = df['email'].str.lower()

In [31]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [None]:
# 4 methods to change mutliple rows at once
    # apply
    # map
    # apply map
    # replace

In [38]:
# apply is used to call a function on values
# applying length function to each value in 'email' series
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [33]:
# creates simple function to uppercase an email address
def update_email(email):
    return email.upper()

In [34]:
# use apply method to call function on email column
# not that you just pass in the function itself without any arguments or parentheses
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [35]:
df['email'] = df['email'].apply(update_email)

In [36]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [37]:
# using a lambda function
df['email'] = df['email'].apply(lambda x: x.lower())

In [38]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [45]:
# running a function on a series
# applies function to every value a series
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [39]:
# running a function on a dataframe
# applies function to every series in the dataframe
# axis=rows is default
# this is saying there are 3 column values at each index
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [47]:
len(df['email'])

3

In [51]:
# gets minimum value of each series/column
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [52]:
# using lambda function where x is a series
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [53]:
# applymap can be used to apply function to every value in dataframe
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [54]:
# this works because all series in the dataframe are string type
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [55]:
# use map() to substitue values in specific column by passing in a dict
# note: for any values that are not substited get converted to NaN values
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [41]:
# use the replace() method instead of map() to keep the names that were not replaced (doesn't change them to NaN)
df['first'] = df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

In [42]:
df

Unnamed: 0,first,last,email
0,Chris,Schafer,coreymschafer@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
