# Data Manipulation in the `tidyverse`

## Load library

In [None]:
library(tidyverse)

## What data sets are available?

In [None]:
data()

In [None]:
help(starwars)

## Bssic information

In [None]:
class(starwars)

In [None]:
dim(starwars)

In [None]:
nrow(starwars)

In [None]:
ncol(starwars)

In [None]:
head(starwars, 3)

In [None]:
tail(starwars, 3)

In [None]:
sample_n(starwars, 3)

## Piping

In [None]:
starwars %>% head(5) %>% tail(3)

In [None]:
starwars %>% 
head(5) %>%
tail(3)

## Select

In [None]:
starwars %>%
select(name, birth_year, species) %>%
head(3)

In [None]:
starwars %>%
select(name, age=birth_year, species) %>%
head(3)

In [None]:
starwars %>% 
select(-name, -birth_year, -species) %>% 
head(3)

In [None]:
starwars%>% 
select(starts_with('s')) %>%
head(3)

In [None]:
starwars%>% 
select(matches("[aeiou]{2,}")) %>%
head(3)

## Arrange

In [None]:
starwars%>% 
select(name, age=birth_year, mass) %>%
arrange(age) %>%
head(3)

In [None]:
starwars%>% 
select(name, age=birth_year, mass) %>%
arrange(desc(age)) %>%
head(3)

In [None]:
starwars%>% 
select(name, age=birth_year, mass) %>%
arrange(age) %>%
head(5)

In [None]:
starwars%>% 
select(name, age=birth_year, mass) %>%
arrange(age, mass) %>%
head(5)

In [None]:
starwars%>% 
select(name, age=birth_year, mass) %>%
top_n(age, n=3)

## Filter

In [None]:
starwars %>%
filter(birth_year > 100)

In [None]:
starwars %>%
filter((birth_year >= 600) | (name == 'Dooku'))

In [None]:
starwars %>%
filter((birth_year >= 600) | (name == 'Dooku')) %>%
select(name, age=birth_year, homeworld)

In [None]:
starwars %>%
filter(birth_year > 100, gender=='male')

In [None]:
starwars %>%
filter(str_detect(homeworld, "in")) %>%
head(3)

## Mutate

In [None]:
starwars%>% 
select(name, age=birth_year, height, mass) %>%
head(3)

In [None]:
starwars%>% 
select(name, age=birth_year, height, mass) %>%
mutate(bmi=mass/(height/100)^2, obese=bmi>30) %>%
head(3)

In [None]:
starwars%>% 
select(name, age=birth_year, height, mass) %>%
transmute(bmi=mass/(height/100)^2, obese=bmi>30) %>%
head(3)

In [None]:
starwars %>%
transmute_if(is_character, str_to_upper) %>%
head(3)

In [None]:
starwars %>%
select_at(c("birth_year", "height", "mass")) %>%
head(3)

In [None]:
starwars %>%
transmute_at(c("birth_year", "height", "mass"), ~ . + 1) %>%
head(3)

## Summarize

In [None]:
starwars %>% 
summarize(avg_mass = mean(mass, na.rm=TRUE), 
          median_age=median(birth_year, na.rm=TRUE))

In [None]:
starwars %>% 
summarize_if(is.numeric, mean, na.rm=TRUE)

In [None]:
starwars %>% 
summarize_if(is.numeric, list(mean, median), na.rm=TRUE)

## Group by

In [None]:
starwars %>%
group_by(homeworld) %>% 
summarize(avg_mass=mean(mass, na.rm=T)) %>%
head(3)

In [None]:
starwars %>%
group_by(homeworld) %>% 
summarize(avg_mass=mean(mass, na.rm=T)) %>%
filter(!is.na(homeworld)) %>%
head(3)

In [None]:
starwars %>%
group_by(homeworld, species, gender) %>% 
summarize(avg_mass=mean(mass, na.rm=T), n=n()) %>%
filter(!is.na(homeworld)) %>%
head(5)

## Exercises

**1**. Show the name, height and mass of all Droids sorted by height from tallest to shortest.

**2**. List the name and hoemworld of all non-humans who appeared in `The Empire Strikes Back`.

**3**. Which homeworld has the most humans?

**4**. What species has the highest average BMI? Exclude Species with only 1 member, and ignore NAs in the calculation of the average.