In this notebook, we will cover:

* [Adding New Variables](Adding-New-Variables)
* [Grouped Summaries](Grouped-Summaries)

Let us load up the `tidyverse` and `nycflights13` packages.

In [1]:
library(tidyverse)
library(nycflights13)

Loading tidyverse: ggplot2
Loading tidyverse: tibble
Loading tidyverse: tidyr
Loading tidyverse: readr
Loading tidyverse: purrr
Loading tidyverse: dplyr
Conflicts with tidy packages ---------------------------------------------------
filter(): dplyr, stats
lag():    dplyr, stats


# Adding New Variables

In [2]:
(my_flights <- select(flights, year:day, dep_time, arr_time, air_time, origin, dest))

year,month,day,dep_time,arr_time,air_time,origin,dest
2013,1,1,517,830,227,EWR,IAH
2013,1,1,533,850,227,LGA,IAH
2013,1,1,542,923,160,JFK,MIA
2013,1,1,544,1004,183,JFK,BQN
2013,1,1,554,812,116,LGA,ATL
2013,1,1,554,740,150,EWR,ORD
2013,1,1,555,913,158,EWR,FLL
2013,1,1,557,709,53,LGA,IAD
2013,1,1,557,838,140,JFK,MCO
2013,1,1,558,753,138,LGA,ORD


In [3]:
mutate(my_flights, time_in_flight = arr_time - dep_time)

year,month,day,dep_time,arr_time,air_time,origin,dest,time_in_flight
2013,1,1,517,830,227,EWR,IAH,313
2013,1,1,533,850,227,LGA,IAH,317
2013,1,1,542,923,160,JFK,MIA,381
2013,1,1,544,1004,183,JFK,BQN,460
2013,1,1,554,812,116,LGA,ATL,258
2013,1,1,554,740,150,EWR,ORD,186
2013,1,1,555,913,158,EWR,FLL,358
2013,1,1,557,709,53,LGA,IAD,152
2013,1,1,557,838,140,JFK,MCO,281
2013,1,1,558,753,138,LGA,ORD,195


In [4]:
hourmin2min <- function(hm) {
    m <- hm %% 100
    h <- (hm - m)/100
    return(60*h + m)
} 

In [5]:
hourmin2min(530)

In [6]:
(my_flights_new <- mutate(my_flights, new_arr = hourmin2min(arr_time), new_dep = hourmin2min(dep_time)))

year,month,day,dep_time,arr_time,air_time,origin,dest,new_arr,new_dep
2013,1,1,517,830,227,EWR,IAH,510,317
2013,1,1,533,850,227,LGA,IAH,530,333
2013,1,1,542,923,160,JFK,MIA,563,342
2013,1,1,544,1004,183,JFK,BQN,604,344
2013,1,1,554,812,116,LGA,ATL,492,354
2013,1,1,554,740,150,EWR,ORD,460,354
2013,1,1,555,913,158,EWR,FLL,553,355
2013,1,1,557,709,53,LGA,IAD,429,357
2013,1,1,557,838,140,JFK,MCO,518,357
2013,1,1,558,753,138,LGA,ORD,473,358


In [7]:
(my_flights_total <- mutate(my_flights_new, total_time = new_arr - new_dep))

year,month,day,dep_time,arr_time,air_time,origin,dest,new_arr,new_dep,total_time
2013,1,1,517,830,227,EWR,IAH,510,317,193
2013,1,1,533,850,227,LGA,IAH,530,333,197
2013,1,1,542,923,160,JFK,MIA,563,342,221
2013,1,1,544,1004,183,JFK,BQN,604,344,260
2013,1,1,554,812,116,LGA,ATL,492,354,138
2013,1,1,554,740,150,EWR,ORD,460,354,106
2013,1,1,555,913,158,EWR,FLL,553,355,198
2013,1,1,557,709,53,LGA,IAD,429,357,72
2013,1,1,557,838,140,JFK,MCO,518,357,161
2013,1,1,558,753,138,LGA,ORD,473,358,115


In [8]:
filter(my_flights_total, total_time < air_time)

year,month,day,dep_time,arr_time,air_time,origin,dest,new_arr,new_dep,total_time
2013,1,1,517,830,227,EWR,IAH,510,317,193
2013,1,1,533,850,227,LGA,IAH,530,333,197
2013,1,1,554,740,150,EWR,ORD,460,354,106
2013,1,1,558,753,138,LGA,ORD,473,358,115
2013,1,1,558,924,345,JFK,LAX,564,358,206
2013,1,1,558,923,361,EWR,SFO,563,358,205
2013,1,1,559,941,257,LGA,DFW,581,359,222
2013,1,1,559,854,337,EWR,LAS,534,359,175
2013,1,1,602,812,170,LGA,MSP,492,362,130
2013,1,1,608,807,139,EWR,ORD,487,368,119


In [9]:
filter(my_flights_total, total_time < 0)

year,month,day,dep_time,arr_time,air_time,origin,dest,new_arr,new_dep,total_time
2013,1,1,1929,3,192,EWR,BQN,3,1169,-1166
2013,1,1,1939,29,,JFK,DFW,29,1179,-1150
2013,1,1,2058,8,159,EWR,TPA,8,1258,-1250
2013,1,1,2102,146,199,EWR,SJU,106,1262,-1156
2013,1,1,2108,25,354,EWR,SFO,25,1268,-1243
2013,1,1,2120,16,160,LGA,FLL,16,1280,-1264
2013,1,1,2121,6,143,EWR,MCO,6,1281,-1275
2013,1,1,2128,26,338,JFK,LAX,26,1288,-1262
2013,1,1,2134,20,152,EWR,FLL,20,1294,-1274
2013,1,1,2136,25,154,EWR,FLL,25,1296,-1271


# Grouped Summaries