/
clean_data.R
93 lines (86 loc) · 5.52 KB
/
clean_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
library(tidyverse)
avia_clean <- avia %>%
select("unit,tra_meas,airp_pr\\time", contains("20")) %>%
gather(date, passengers, -`unit,tra_meas,airp_pr\\time`) %>%
separate(col = `unit,tra_meas,airp_pr\\time`, into = c("unit", "tra_meas", "air_pr\\time"), sep = ",") %>%
mutate(tra_meas = fct_recode(tra_meas,
`Passengers on board` = "PAS_BRD",
`Passengers on board (arrivals)` = "PAS_BRD_ARR",
`Passengers on board (departures)` = "PAS_BRD_DEP",
`Passengers carried` = "PAS_CRD",
`Passengers carried (arrival)` = "PAS_CRD_ARR",
`Passengers carried (departures)` = "PAS_CRD_DEP",
`Passengers seats available` = "ST_PAS",
`Passengers seats available (arrivals)` = "ST_PAS_ARR",
`Passengers seats available (departures)` = "ST_PAS_DEP",
`Commercial passenger air flights` = "CAF_PAS",
`Commercial passenger air flights (arrivals)` = "CAF_PAS_ARR",
`Commercial passenger air flights (departures)` = "CAF_PAS_DEP")) %>%
mutate(unit = fct_recode(unit,
Passenger = "PAS",
Flight = "FLIGHT",
`Seats and berths` = "SEAT")) %>%
mutate(destination = fct_recode(`air_pr\\time`,
`WIEN-SCHWECHAT` = "LU_ELLX_AT_LOWW",
`BRUSSELS` = "LU_ELLX_BE_EBBR",
`GENEVA` = "LU_ELLX_CH_LSGG",
`ZURICH` = "LU_ELLX_CH_LSZH",
`FRANKFURT/MAIN` = "LU_ELLX_DE_EDDF",
`HAMBURG` = "LU_ELLX_DE_EDDH",
`BERLIN-TEMPELHOF` = "LU_ELLX_DE_EDDI",
`MUENCHEN` = "LU_ELLX_DE_EDDM",
`SAARBRUECKEN` = "LU_ELLX_DE_EDDR",
`BERLIN-TEGEL` = "LU_ELLX_DE_EDDT",
`KOBENHAVN/KASTRUP` = "LU_ELLX_DK_EKCH",
`HURGHADA / INTL` = "LU_ELLX_EG_HEGN",
`IRAKLION/NIKOS KAZANTZAKIS` = "LU_ELLX_EL_LGIR",
`FUERTEVENTURA` = "LU_ELLX_ES_GCFV",
`GRAN CANARIA` = "LU_ELLX_ES_GCLP",
`LANZAROTE` = "LU_ELLX_ES_GCRR",
`TENERIFE SUR/REINA SOFIA` = "LU_ELLX_ES_GCTS",
`BARCELONA/EL PRAT` = "LU_ELLX_ES_LEBL",
`ADOLFO SUAREZ MADRID-BARAJAS` = "LU_ELLX_ES_LEMD",
`MALAGA/COSTA DEL SOL` = "LU_ELLX_ES_LEMG",
`PALMA DE MALLORCA` = "LU_ELLX_ES_LEPA",
`SYSTEM - PARIS` = "LU_ELLX_FR_LF90",
`NICE-COTE D'AZUR` = "LU_ELLX_FR_LFMN",
`PARIS-CHARLES DE GAULLE` = "LU_ELLX_FR_LFPG",
`STRASBOURG-ENTZHEIM` = "LU_ELLX_FR_LFST",
`KEFLAVIK` = "LU_ELLX_IS_BIKF",
`MILANO/MALPENSA` = "LU_ELLX_IT_LIMC",
`BERGAMO/ORIO AL SERIO` = "LU_ELLX_IT_LIME",
`ROMA/FIUMICINO` = "LU_ELLX_IT_LIRF",
`AGADIR/AL MASSIRA` = "LU_ELLX_MA_GMAD",
`AMSTERDAM/SCHIPHOL` = "LU_ELLX_NL_EHAM",
`WARSZAWA/CHOPINA` = "LU_ELLX_PL_EPWA",
`PORTO` = "LU_ELLX_PT_LPPR",
`LISBOA` = "LU_ELLX_PT_LPPT",
`STOCKHOLM/ARLANDA` = "LU_ELLX_SE_ESSA",
`MONASTIR/HABIB BOURGUIBA` = "LU_ELLX_TN_DTMB",
`ENFIDHA-HAMMAMET INTERNATIONAL` = "LU_ELLX_TN_DTNH",
`ENFIDHA ZINE EL ABIDINE BEN ALI` = "LU_ELLX_TN_DTNZ",
`DJERBA/ZARZIS` = "LU_ELLX_TN_DTTJ",
`ANTALYA (MIL-CIV)` = "LU_ELLX_TR_LTAI",
`ISTANBUL/ATATURK` = "LU_ELLX_TR_LTBA",
`SYSTEM - LONDON` = "LU_ELLX_UK_EG90",
`MANCHESTER` = "LU_ELLX_UK_EGCC",
`LONDON GATWICK` = "LU_ELLX_UK_EGKK",
`LONDON/CITY` = "LU_ELLX_UK_EGLC",
`LONDON HEATHROW` = "LU_ELLX_UK_EGLL",
`LONDON STANSTED` = "LU_ELLX_UK_EGSS",
`NEWARK LIBERTY INTERNATIONAL, NJ.` = "LU_ELLX_US_KEWR",
`O.R TAMBO INTERNATIONAL` = "LU_ELLX_ZA_FAJS")) %>%
mutate(passengers = as.numeric(passengers)) %>%
select(unit, tra_meas, destination, date, passengers)
avia_clean_quarterly <- avia_clean %>%
filter(tra_meas == "Passengers on board (arrivals)",
!is.na(passengers)) %>%
filter(str_detect(date, "Q")) %>%
mutate(date = yq(date))
avia_clean_monthly <- avia_clean %>%
filter(tra_meas == "Passengers on board (arrivals)",
!is.na(passengers)) %>%
filter(str_detect(date, "M")) %>%
mutate(date = paste0(date, "01")) %>%
mutate(date = ymd(date)) %>%
select(destination, date, passengers)