In [3]:
#5 Updating Rows and Columns

In [None]:
people = {
    "first": ["Gonzalo", "Jane", "Joe"],
    "last": ["Rivero Baud", "Doe", "Doe"],
    "email": ["burrito@gmail.com", 'janedoe@email.com', "johndoe@email.com"]
}

In [None]:
import pandas as pd

In [None]:
df = pd.DataFrame(people)

In [4]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [5]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [6]:
df.columns = ['first_name', 'last_name', 'email']

In [7]:
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [9]:
df

Unnamed: 0,first_name,last_name,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [10]:
df.columns = [x.upper() for x in df.columns]

In [11]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [12]:
df.columns = df.columns.str.replace('_', ' ')

In [13]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [14]:
df.columns = df.columns.str.replace(' ', '_')

In [15]:
df.columns = [x.lower() for x in df.columns]

In [16]:
df

Unnamed: 0,first_name,last_name,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [17]:
df.rename(columns={'first_name': 'first', 'last_name':'last'}, inplace=True)

In [18]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,Joe,Doe,johndoe@email.com


In [19]:
df.loc[2]

first                  Joe
last                   Doe
email    johndoe@email.com
Name: 2, dtype: object

In [20]:
df.loc[2] = ['John', 'Smith', 'johnsmith@email.com']

In [21]:
df.loc[2]

first                   John
last                   Smith
email    johnsmith@email.com
Name: 2, dtype: object

In [22]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johnsmith@email.com


In [24]:
df.loc[2, ['last', 'email']]

last                   Smith
email    johnsmith@email.com
Name: 2, dtype: object

In [27]:
df.loc[2, ['last', 'email']] = ['Doe', 'johndoe@email.com']

In [28]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [30]:
df.loc[2, 'last'] = 'Smith'

In [31]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Smith,johndoe@email.com


In [32]:
df.at[2, 'last'] = 'Doe' # single value update

In [33]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [35]:
filt = (df['email'] == 'johndoe@email.com')

Unnamed: 0,first,last,email
2,John,Doe,johndoe@email.com


In [36]:
df[filt]

Unnamed: 0,first,last,email
2,John,Doe,johndoe@email.com


In [37]:
df[filt]['last']

2    Doe
Name: last, dtype: object

In [38]:
df[filt]['last'] == 'Smith'

2    False
Name: last, dtype: bool

In [39]:
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [40]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [42]:
df['email'].str.upper()

0    BURRITO@GMAIL.COM
1    JANEDOE@EMAIL.COM
2    JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [43]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [46]:
df['email'] = df['email'].str.upper()

In [47]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,BURRITO@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Doe,JOHNDOE@EMAIL.COM


In [48]:
df['email'] = df['email'].str.lower()

In [51]:
# apply use to calling a function on our values

df['email'].apply(len) # apply uses on series

0    17
1    17
2    17
Name: email, dtype: int64

In [52]:
def update_email(email):
    return email.upper()

In [56]:
df['email'].apply(update_email)

0    BURRITO@GMAIL.COM
1    JANEDOE@EMAIL.COM
2    JOHNDOE@EMAIL.COM
Name: email, dtype: object

In [57]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [58]:
df['email'] = df['email'].apply(update_email)

In [59]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,BURRITO@GMAIL.COM
1,Jane,Doe,JANEDOE@EMAIL.COM
2,John,Doe,JOHNDOE@EMAIL.COM


In [60]:
df['email'] = df['email'].apply(lambda x: x.lower())

In [61]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [63]:
# apply on a dataframe runs the function oin each row and column in the df

df.apply(len) # apply on a dataframe

first    3
last     3
email    3
dtype: int64

In [64]:
len(df['first'])

3

In [65]:
df.apply(len, axis='rows')

first    3
last     3
email    3
dtype: int64

In [66]:
df.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [67]:
df.apply(pd.Series.min)

first              Gonzalo
last                   Doe
email    burrito@gmail.com
dtype: object

In [68]:
df.apply(lambda x: x.min())

first              Gonzalo
last                   Doe
email    burrito@gmail.com
dtype: object

In [69]:
df.applymap(len) # applymap

Unnamed: 0,first,last,email
0,7,11,17
1,4,3,17
2,4,3,17


In [71]:
df.applymap(str.lower)

Unnamed: 0,first,last,email
0,gonzalo,rivero baud,burrito@gmail.com
1,jane,doe,janedoe@email.com
2,john,doe,johndoe@email.com


In [72]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [73]:
df['first'].map({'Gonzalo': 'Burrito', 'Jane': 'Mary'}) # map

0    Burrito
1       Mary
2        NaN
Name: first, dtype: object

In [74]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [75]:
df['first'].replace({'Gonzalo': 'Burrito', 'Jane': 'Mary'}) # replace

0    Burrito
1       Mary
2       John
Name: first, dtype: object

In [76]:
df

Unnamed: 0,first,last,email
0,Gonzalo,Rivero Baud,burrito@gmail.com
1,Jane,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com


In [77]:
df['first'] = df['first'].replace({'Gonzalo': 'Burrito', 'Jane': 'Mary'})

In [78]:
df

Unnamed: 0,first,last,email
0,Burrito,Rivero Baud,burrito@gmail.com
1,Mary,Doe,janedoe@email.com
2,John,Doe,johndoe@email.com
