In [None]:
import pandas as pd

In [None]:
pdf = pd.read_csv('./data/survey_results_public.csv', index_col='ResponseId')
schema_pdf = pd.read_csv('./data/survey_results_schema.csv', index_col='qname')
pdf_persons = pd.DataFrame({
    "first": ["Efi", "Fritz", "Dixie", "Theodor"],
    "last": ["Coman", "Johansen", "Normus", "Normus"],
    "email": ["EfiComan@purim.org", "Fandango@bar.gov", "NormusDixie@gmail.com", "theOdOr@bbs.co.il"]
})

In [None]:
pdf_persons.columns

In [None]:
# This will change the names of all columns (except for 'email' as it is the same)
pdf_persons.columns = ['first_name', 'last_name', 'email']
pdf_persons

In [None]:
# Let's use list comprehension to upper case all columns

pdf_persons.columns = [x.upper() for x in pdf_persons.columns]
pdf_persons.columns 

In [None]:
# replace spaces with underscores in column names
pdf_persons.columns = [x.replace(' ', '_') for x in pdf_persons.columns]

In [None]:
# Renaming only specific columns
pdf_persons.rename(columns={
    'FIRST_NAME': 'first',
    'LAST_NAME': 'last'
})

In [None]:
# make sure you did it inplace
pdf_persons

In [None]:
pdf_persons.rename(columns={
    'FIRST_NAME': 'first',
    'LAST_NAME': 'last',
    'EMAIL': 'email'
}, inplace=True)
pdf_persons

In [None]:
# Change content of a row
pdf_persons.loc[3]  # = ['Theodor', 'Normix', 'a@a.com']
pdf_persons

In [None]:
# What happens if there are tons of columns and we want to change the content of only a few?
# Just use loc[] again

pdf_persons.loc[3, ['last', 'email']] = ['Normus', 'theOdOr@bbs.co.il']
pdf_persons


In [None]:
# Change a single value
pdf_persons.loc[3, 'last'] = 'Normus1'
pdf_persons

In [None]:
# Adding a new column
pdf_persons['bobo'] = 'bobo'  #[f'bobo_{x}' for x in range(4)]
pdf_persons

In [None]:
# change only specific entries in a column by value of other columns in the row
filt = (pdf_persons['last'].str.lower().str.startswith('normus'))
pdf_persons.loc[filt, 'bobo'] = 'is_normus'
pdf_persons

## The 4 methods of conditionally changing cell contents
* apply
* map
* applymap
* replace

## The `apply()` method

In [None]:
# apply
pdf_persons['email'].apply(len)

In [None]:
def update_email(email: str) -> str:
    return email.upper()

In [None]:
# use apply with custom function
pdf_persons['email'] = pdf_persons['email'].apply(update_email)
pdf_persons

In [None]:
# use apply with lambda function
pdf_persons['email'] = pdf_persons['email'].apply(lambda x: x.lower())
pdf_persons

In [None]:
# what happens when we use apply on a DataFrame (till now we used it on a Series)
pdf_persons.apply(len)
# the function is run once per series

In [None]:
len(pdf_persons.email)

## The `applymap()` method

like apply but when used on a DataFrame, runs the required function on every cell (instead of every Series in the axis)

In [None]:
pdf_persons.applymap(len)

## The `map()` method

Map values of Series according to an input mapping or function.

In [None]:
pdf_persons.email.map({'theodor@bbs.co.il': 'bobo@bobo.org', 'fandango@bar.gov': 'b@b.c'})

In [None]:
# See that when there is no match, the value for the cell is NaN. Better use Series.replace() like we showed earlier to change a subset using a mapped object.
pdf_persons.email.replace({'theodor@bbs.co.il': 'bobo@bobo.org', 'fandango@bar.gov': 'b@b.c'})