In [1]:
people = {
    "first": ['Corey', 'Jane', 'John'],
    "last": ['Schafer', 'Doe', 'Doe'],
    "email": ['CoreyMSchafer@gmail.com', 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [2]:
import pandas as pd

In [3]:
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [4]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [5]:
# Update col names
df.columns = ['first_name', 'last_name', 'email']
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [6]:
# Using list comprehension to modify the column names
df.columns = [x.upper() for x in df.columns]
df.columns

Index(['FIRST_NAME', 'LAST_NAME', 'EMAIL'], dtype='object')

In [7]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [8]:
# Another modification
df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [9]:
# Another modification
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [10]:
df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [13]:
# To rename some cols

df.rename(columns = {'first_name': 'first', 'last_name': 'last'})

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [16]:
# The above will not change the values in the df unless we make inplace=True
df
# That was what Corey had said but I guess not inplace happens by default
# i.e by default df.rename(columns = {'first_name': 'first', 'last_name': 'last'}, inplace=True)

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [17]:
df.loc[2]

first                 John
last                   Doe
email    JohnDoe@email.com
Name: 2, dtype: object

In [18]:
df.loc[2] = ['John', 'Smith', 'JohnSmith@email.com']

In [19]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnSmith@email.com


In [20]:
# Changing specific columns

In [21]:
df.loc[2, ['last', 'email']]

last                   Smith
email    JohnSmith@email.com
Name: 2, dtype: object

In [22]:
df.loc[2, ['last', 'email']] = ['Doe', 'JohnDoe@email.com']

In [23]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [25]:
# As we can see that the value is set back to the original

In [26]:
df.loc[2, 'last'] = 'Smith'

In [27]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [28]:
# Just  the last value is changed

In [31]:
# We can change a single value using 'at' also, but this method is not very common
df.at[2, 'last'] = 'Doe'
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [32]:
# Common Error:
filt = (df['email'] == 'JohnDoe@email.com')
df[filt]['last']

2    Doe
Name: last, dtype: object

In [33]:
# The error is, now if we try to change the value:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [34]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [35]:
# As we can see, the change doesn't take place

In [36]:
# Thus, when changing values, using loc or indexers as we saw above

In [37]:
df.loc[filt, 'last' ] = 'Smith'

In [38]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [39]:
# Now the change takes place

In [40]:
# Updating multiple rows  of data:

In [41]:
# Sce: update all the email addresses to lowercase
df['email'].str.lower()

0    coreymschafer@gmail.com
1          janedoe@email.com
2          johndoe@email.com
Name: email, dtype: object

In [42]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@email.com
2,John,Smith,JohnDoe@email.com


In [44]:
df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [45]:
# apply, map,  applymap, replace
# These are used to change multiple rows at once, along with the upper cell technique.

In [46]:
# apply is used for calling a function on our values.
# it can work on a dataframe or a series object.
df['email'].apply(len)

0    23
1    17
2    17
Name: email, dtype: int64

In [47]:
def update_email(email):
    return email.upper()

In [48]:
df['email'].apply(update_email)

0    COREYMSCHAFER@GMAIL.COM
1          JANEDOE@EMAIL.COM
2          JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [49]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [50]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,COREYMSCHAFER@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Smith,JOHNDOE@EMAIL.COM


In [52]:
df['email'] = df['email'].apply(lambda x: x.lower())

# We are using a lambda while converting back

df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [53]:
# We have seen tilll now how apply works with series objects
# Now let's see how it works with dataframes

In [54]:
df.apply(len)

first    3
last     3
email    3
dtype: int64

In [55]:
len(df['email'])

3

In [56]:
# This is what apply is doing, but on whole df

In [57]:
df.apply(len, axis='columns')


0    3
1    3
2    3
dtype: int64

In [58]:
# To get min value of each series
df.apply(pd.Series.min)

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [59]:
df.apply(lambda x: x.min())

first                      Corey
last                         Doe
email    coreymschafer@gmail.com
dtype: object

In [60]:
# Running apply on a series, applies on a series,
# but on a dataframe apply works on every series in a df

In [61]:
# applymap is used to apply a function to every individual element in a df
# applymap works only on dataframes and Series objects don't have this method

In [62]:
df.applymap(len)

Unnamed: 0,first,last,email
0,5,7,23
1,4,3,17
2,4,5,17


In [65]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,corey,schafer,coreymschafer@gmail.com
1,jane,doe,janedoe@email.com
2,john,smith,johndoe@email.com


In [66]:
# map works only on a series

In [67]:
df['first'].map({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2      NaN
Name: first, dtype: object

In [68]:
# John got converted to NaN
# To keep the name as it is, we must use replace instead of map

In [69]:
df['first'].replace({'Corey': 'Chris', 'Jane': 'Mary'})

0    Chris
1     Mary
2     John
Name: first, dtype: object

In [70]:
# These values don't get reflected to our actual df, as we didn't assign the 
# operation to the df

In [71]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,coreymschafer@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com
