# Applying functions

... and more data cleanup

In [None]:
import pandas as pd
df = pd.read_csv('provinces-clean1.csv')

In [None]:
df

## Drop the Total row

In [None]:
df2 = df[df.Province != 'Total']

## Painful aside: SettingWithCopyWarning

In [None]:
df2['country'] = 'Canada'

[Explanation](https://www.dataquest.io/blog/settingwithcopywarning/)

Solution: use .copy()

In [None]:
df3 = df[df.Province != 'Total'].copy()
df3['country'] = 'Canada'
df3

In [None]:
df = df[df.Province != 'Total'].copy()

## Back to work

In [None]:
df.languages

## apply any function

In [None]:
'English'.lower()

In [None]:
str.lower('English')

In [None]:
df.languages.apply(str.lower)

In [None]:
df.languages.apply(lambda x:
                  x.lower())

## str shortcut

In [None]:
df.languages.str.lower()

## Apply named function

In [None]:
df.Province

In [None]:
def without_footnote(raw):
    return raw.split('[')[0]

In [None]:
df.Province.apply(without_footnote)

In [None]:
df['Province'] = df.Province.apply(without_footnote)
df['capital'] = df.capital.apply(without_footnote)
df['largest_city'] = df.largest_city.apply(without_footnote)

In [None]:
import re
capitalized_word = re.compile(r'[A-Z][a-z]+')
def clean_lang(raw):
    result = without_footnote(raw)
    result = capitalized_word.findall(result)
    return ', '.join(result)

In [None]:
clean_lang('EnglishFrench')

In [None]:
df.languages.apply(clean_lang)

In [None]:
df['languages'] = df.languages.apply(clean_lang)

In [None]:
df.Province

In [None]:
df.Province.apply(lambda p: p.split('[')[0])

In [None]:
df['Province'] = df.Province.apply(lambda p: p.split('[')[0])

## applying numeric functions

In [None]:
df.population

In [None]:
df.population.round(-3)

In [None]:
df['pop_per_km2'] = df.population / df.land_km2
df[['Province', 'pop_per_km2']]

In [None]:
import math
df.population.apply(math.cos)

## review: date conversion

In [None]:
df['entered'] = pd.to_datetime(df.entered)

In [None]:
df

## output

In [None]:
df.to_csv('provinces-clean2.csv', index=False)