# DATA422 Group Project
## NZ Regions DataFrame (Scraping Notebook)
### Samuel Love - 84107034

The goal of this notebook is to create a dataframe for use with data wrangling in the group assignment.

This is achieved by scraping wikipedia to create a dataframe of regions, districts, and cities of New Zealand that can be referenced and manipulated when converting datasets.

In [1]:
library(tidyverse)
library(magrittr) # better handling of pipes
library(purrr) # to work with lists and map functions
library(glue) # to paste strings
library(stringr) # to hand strings
library(rvest) # rvest makes scraping easier
library(xml2) # makes it easier to work with HTML and XML from R
library(visdat) # visualises missingness
library(httr) # useful for API's

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6      [32m✔[39m [34mpurrr  [39m 0.3.4 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mstringr[39m 1.4.1 
[32m✔[39m [34mreadr  [39m 2.1.2      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    extract



Attaching package: ‘rvest’


The following object is masked from ‘package:readr’:

    guess_encoding




In [2]:
# Regions of New Zealand
region_page <- "https://en.wikipedia.org/wiki/Regions_of_New_Zealand"
region_info <- read_html(region_page)
region_info

{html_document}
<html class="client-nojs" lang="en" dir="ltr">
[1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
[2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject  ...

In [3]:
region_info %>% glimpse()

List of 2
 $ node:<externalptr> 
 $ doc :<externalptr> 
 - attr(*, "class")= chr [1:2] "xml_document" "xml_node"


In [4]:
# Districts of New Zealand
district_page <- "https://en.wikipedia.org/wiki/Districts_of_New_Zealand"
district_info <- read_html(district_page)
district_info

{html_document}
<html class="client-nojs" lang="en" dir="ltr">
[1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
[2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject  ...

In [5]:
district_info %>% glimpse()

List of 2
 $ node:<externalptr> 
 $ doc :<externalptr> 
 - attr(*, "class")= chr [1:2] "xml_document" "xml_node"


In [6]:
# Scraping region names
Regions <- 
    region_info %>%
    html_elements("b") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("title")

# Filtering out NULLs
Regions <- Regions[lapply(Regions, length) > 0]

# Adjusting names
Regions[c(1,2,5,9,10,11,12,13,14,16)] <-
    c("Northland","Auckland","Gisborne","Wellington","Tasman","Nelson","Marlborough","West Coast","Canterbury","Southland")

In [7]:
# Initialising the DataFrame
Districts_df <- tibble(Region = Regions)
Districts_df

Region
<list>
Northland
Auckland
Waikato
Bay of Plenty
Gisborne
Hawke's Bay
Taranaki
Manawatū-Whanganui
Wellington
Tasman


In [8]:
# Obtaining the number of districts in each region
District_counts <- 
    district_info %>%
    html_nodes(".mw-headline") %>%
    html_attrs()

# Triming elements that are not regions
District_counts <- District_counts[3:18]
District_counts[[10]] <- NULL
District_counts[[8]] <- NULL
District_counts[[8]] <- NULL

District_counts

In [9]:
District_counts %<>% map("id")

# Consolidating unusal districts.
District_counts[[12]] <- "Transregional_districts_(7)"
District_counts[[13]] <- "Unitary_authorities_(3)"

# Preparing the list for the DataFrame
District_counts <- c(District_counts[1],
                     NA,
                     District_counts[2:3],
                     NA,
                     District_counts[4:7],
                     NA,
                     NA,
                     NA,
                     District_counts[8:13])

In [10]:
# Adjusting DataFrame for the district counts
Districts_df <- rbind(Districts_df, tibble(Region = NA))
Districts_df <- rbind(Districts_df, tibble(Region = NA))
Districts_df

Region
<list>
Northland
Auckland
Waikato
Bay of Plenty
Gisborne
Hawke's Bay
Taranaki
Manawatū-Whanganui
Wellington
Tasman


In [11]:
# Extending the DataFrame
Districts_df %<>% mutate(District_Counts = District_counts)
Districts_df

Region,District_Counts
<list>,<list>
Northland,Northland_Region_(3)
Auckland,
Waikato,Waikato_Region_(7)
Bay of Plenty,Bay_of_Plenty_Region_(4)
Gisborne,
Hawke's Bay,Hawke's_Bay_Region_(3)
Taranaki,Taranaki_Region_(2)
Manawatū-Whanganui,Manawatū-Whanganui_Region_(4)
Wellington,Wellington_Region_(4)
Tasman,


In [12]:
# Extracting district names
Districts <- 
    district_info %>%
    html_elements("li") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("title")

# Filtering out NULLs
Districts <- Districts[lapply(Districts, length) > 0]

# Minor name adjustments
Districts[c(16,30,49)] <- c("Hastings District", "Stratford District", "Gore District")

# There are 53 districts
Districts <- Districts[1:53]

In [13]:
# Preparing the list for the DataFrame
Districts_list <- list(c(Districts[1:3]),
                       NA,
                       Districts[4:10],
                       Districts[11:14],
                       NA,
                       Districts[15:17],
                       Districts[18:19],
                       Districts[20:23],
                       Districts[24:27],
                       NA,
                       NA,
                       NA,
                       Districts[35:37],
                       Districts[38:45],
                       Districts[46:48],
                       Districts[49:50],
                       Districts[c(28:33, 51)],
                       Districts[c(34, 52:53)])

In [14]:
# Extending the DataFrame
Districts_df %<>% mutate(Districts = Districts_list)
Districts_df

Region,District_Counts,Districts
<list>,<list>,<list>
Northland,Northland_Region_(3),"Far North District, Kaipara District , Whangarei District"
Auckland,,
Waikato,Waikato_Region_(7),"Hauraki District , Matamata-Piako District , Otorohanga District , South Waikato District , Thames-Coromandel District, Waikato District , Waipa District"
Bay of Plenty,Bay_of_Plenty_Region_(4),"Kawerau District , Ōpōtiki District , Western Bay of Plenty District, Whakatane District"
Gisborne,,
Hawke's Bay,Hawke's_Bay_Region_(3),"Central Hawke's Bay District, Hastings District , Wairoa District"
Taranaki,Taranaki_Region_(2),"New Plymouth District , South Taranaki District"
Manawatū-Whanganui,Manawatū-Whanganui_Region_(4),"Horowhenua District, Manawatu District , Ruapehu District , Whanganui District"
Wellington,Wellington_Region_(4),"Carterton District , Kapiti Coast District , Masterton District , South Wairarapa District"
Tasman,,


In [15]:
# Finding links to the Regions pages to look for cities
Region_ends <-
    region_info %>%
    html_elements("b") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("href")
Region_ends <- Region_ends[1:16]

In [16]:
# Automating the links including reading the hmtls
Region_link <- function(link){
    full_link <- glue("https://en.wikipedia.org{link}") %>%
        read_html()
    return(full_link)
}

In [17]:
# Automating reading city names from a table
read_table <- function(html){
    funnel <-
    html %>%
    html_nodes(".wikitable") %>%
    html_elements("tbody") %>%
    html_elements("tr") %>%
    html_elements("td") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("title")
    return(funnel)
}

In [18]:
# Automating reading city names without a table
read_without_table <- function(html){
    funnel <-
    html %>%
    html_elements("ul") %>%
    html_elements("li") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("title")
    return(funnel)
}

In [19]:
# Northland
Cities_1 <- 
    Region_link(Region_ends[1]) %>%
    read_table()
Cities_1[c(7,13,15,16)] <- c("One Tree Point (Marsden Bay)","Kawakawa","Haruru","Waipu")

In [20]:
# Auckland
Cities_2 <-
    Region_link(Region_ends[2]) %>%
    read_without_table()
Cities_2 <- Cities_2[7:24]
Cities_2[c(4,5,6,7,10)] <- c("Waiheke West","Beachlands-Pine Harbour","Warkworth","Kumeū-Huapai","Riverhead")
Cities_2[8] <- NULL
Cities_2 <- c("Auckland", Cities_2)

In [21]:
# Waikato
Cities_3 <-
    Region_link(Region_ends[3])  %>%
    read_table()
Cities_3[c(1,3,6,10,20,25)] <- c("Hamilton","Cambridge","Huntly","Thames","Raglan","Coromandel")

In [22]:
# Bay of Plenty
Cities_4 <-
    Region_link(Region_ends[4])  %>%
    read_table()
Cities_4[13] <- "Edgecumbe"

In [23]:
# Gisborne
Cities_5 <-
    Region_link(Region_ends[5]) %>%
    read_without_table()
Cities_5 <- Cities_5[14:21]
Cities_5[5] <- "Te Araroa"

In [24]:
# Hawke's Bay
Cities_6_a <-
    Region_link(Region_ends[6]) %>%
    read_table()
Cities_6_a <- Cities_6_a[1:7]

Cities_6_b <-
    Region_link(Region_ends[6]) %>%
    read_without_table()
Cities_6_b <- Cities_6_b[16:29]

Cities_6 <- c(Cities_6_a, Cities_6_b)
Cities_6[c(1,2,6,12,13,18)] <- c("Napier","Hastings","Clive","Whirinaki","Whakatu","Ongaonga")

In [25]:
# Taranaki
Cities_7 <-
    Region_link(Region_ends[7]) %>%
    read_table()
Cities_7 <- Cities_7[1:8]
Cities_7[c(3,4,5,6)] <- c("Waitara", "Stratford", "Inglewood", "Eltham")

In [26]:
# Manawatū-Whanganui
Cities_8_a <-
    Region_link(Region_ends[8]) %>%
    read_table()
Cities_8_a[c(3,6,8,11,14,15)] <- c("Levin", "Marton", "Foxton", "Bulls", "Woodville", "Shannon")

Cities_8_b <-
    Region_link(Region_ends[8]) %>%
    read_without_table()
Cities_8_b <- c(Cities_8_b[33:36],
                "Hiwinui",
                Cities_8_b[37:43],
                "Mangaore",
                Cities_8_b[44:55],
                "Raurimu",
                Cities_8_b[56:59],
                "Waikawa Beach",
                Cities_8_b[60],
                "Waitarere Beach")
Cities_8_b[c(9,15,17,28,30)] <- c("Kimbolton","National Park","Ohakea","Sanson","Tokomaru")

Cities_8 <- c(Cities_8_a, Cities_8_b)

In [27]:
# Wellington
Cities_9 <-
    Region_link(Region_ends[9]) %>%
    html_nodes(".wikitable:nth-child(45)") %>%
    html_elements("tbody") %>%
    html_elements("tr") %>%
    html_elements("td") %>%
    html_text2()
Cities_9 <- Cities_9[c(1,4,7,10,13,16,19,22,25,28,31,34,37,40)]

In [28]:
# Tasman
Cities_10_a <-
    Region_link(Region_ends[10]) %>%
    read_table()
Cities_10_a[c(1,3,6)] <- c("Richmond","Wakefield","Māpua")

Cities_10_b <-
    Region_link(Region_ends[10]) %>%
    read_without_table()
Cities_10_b <- c(Cities_10_b[10],
                "Pohara/Ligar Bay/Tata Beach/Tarakohe/Wainui",
                Cities_10_b[11:19],
                "Māpua–Ruby Bay",
                Cities_10_b[22:25])
Cities_10_b[c(1,4,5,13,15,16)] <- c("Collingwood","Murchison","Saint Arnaud","Tasman","Wakefield","Richmond")

Cities_10 <- c(Cities_10_a, Cities_10_b)

In [29]:
# Nelson
# No list of cities on Wikipedia
Cities_11 <- NA

In [30]:
# Marlborough
Cities_12_a <-
    Region_link(Region_ends[12]) %>%
    read_table()
Cities_12_a <- list("Blenheim","Picton","Renwick")

Cities_12_b <-
    Region_link(Region_ends[12]) %>%
    read_without_table()
Cities_12_b <- Cities_12_b[15:27]
Cities_12_b[c(2,3,8,9,12,13)] <- c("Grovetown","Havelock","Seddon","Spring Creek","Ward","Woodbourne")

Cities_12 <- c(Cities_12_a, Cities_12_b)

In [31]:
# West Coast
Cities_13_a <-
    Region_link(Region_ends[13]) %>%
    read_table()
Cities_13_a <- c(Cities_13_a[4],
                 "Westport",
                 Cities_13_a[6],
                 "Runanga",
                 Cities_13_a[8])

Cities_13_b <-
    Region_link(Region_ends[13]) %>%
    read_without_table()
Cities_13_b <- Cities_13_b[15:30]
Cities_13_b[c(2,4,5,6,7,9,13,14,15)] <- c("Blackball","Dobson","Fox Glacier","Franz Josef","Gloriavale","Haast","Kumara","Ngakawau","Ross")

Cities_13 <- c(Cities_13_a, Cities_13_b)

In [32]:
# Canterbury
Cities_14 <-
    Region_link(Region_ends[14]) %>%
    html_nodes(".mw-parser-output") %>%
    html_elements("div") %>%
    html_elements("table") %>%
    html_elements("tbody") %>%
    html_elements("tr") %>%
    html_elements("td") %>%
    html_elements("a") %>%
    html_attrs() %>%
    map("title")
Cities_14 <- Cities_14[c(3,11:19)]
Cities_14[c(3,4,7)] <- c("Rolleston","Ashburton","Lincoln")

In [33]:
# Otago
Cities_15 <-
    Region_link(Region_ends[15]) %>%
    read_table()
Cities_15[[2]] <- NULL
Cities_15[c(2,6,7,8,9,11,12,14)] <- c("Queenstown","Cromwell","Lake Hayes","Alexandra","Balclutha","Milton","Brighton","Clyde")

In [34]:
# Southland
Cities_16 <-
    Region_link(Region_ends[16]) %>%
    read_table()
Cities_16[c(2,3,5,7)] <- c("Gore","Winton","Bluff","Riverton")

In [35]:
# Combining cities list
Cities_list <- list(Cities_1,
                    Cities_2,
                    Cities_3,
                    Cities_4,
                    Cities_5,
                    Cities_6,
                    Cities_7,
                    Cities_8,
                    Cities_9,
                    Cities_10,
                    Cities_11,
                    Cities_12,
                    Cities_13,
                    Cities_14,
                    Cities_15,
                    Cities_16,
                    NA,
                    NA)

In [64]:
# Finilasing the DataFrame
Districts_df %<>% mutate(Cities = Cities_list)
Districts_df %<>% as.data.frame
Districts_df

Region,District_Counts,Districts,Cities
<list>,<list>,<list>,<list>
Northland,Northland_Region_(3),"Far North District, Kaipara District , Whangarei District","Whangārei , Kerikeri , Kaitaia , Dargaville , Kaikohe , Ruakākā , One Tree Point (Marsden Bay), Mangawhai Heads , Moerewa , Hikurangi , Opua , Paihia , Kawakawa , Ngunguru , Haruru , Waipu"
Auckland,,,"Auckland , Hibiscus Coast , Pukekohe , Waiuku , Waiheke West , Beachlands-Pine Harbour, Warkworth , Kumeū-Huapai , Snells Beach , Riverhead , Helensville , Maraetai , Wellsford , Clarks Beach , Waimauku , Muriwai , Patumahoe , Parakai"
Waikato,Waikato_Region_(7),"Hauraki District , Matamata-Piako District , Otorohanga District , South Waikato District , Thames-Coromandel District, Waikato District , Waipa District","Hamilton , Taupō , Cambridge , Tokoroa , Te Awamutu , Huntly , Morrinsville, Matamata , Ngāruawāhia , Thames , Whitianga , Waihi , Tuakau , Te Kuiti , Te Aroha , Putāruru , Paeroa , Whangamatā , Turangi , Raglan , Pōkeno , Otorohanga , Kihikihi , Te Kauwhata , Coromandel , Tairua , Ngatea , Pirongia , Pauanui"
Bay of Plenty,Bay_of_Plenty_Region_(4),"Kawerau District , Ōpōtiki District , Western Bay of Plenty District, Whakatane District","Tauranga , Rotorua , Whakatāne , Te Puke , Kawerau , Katikati , Ōpōtiki , Ngongotahā , Ōmokoroa , Ōhope , Waihi Beach, Murupara , Edgecumbe"
Gisborne,,,"Hicks Bay , Manutuke , Patutahi , Ruatoria , Te Araroa , Te Karaka , Tokomaru Bay, Tolaga Bay"
Hawke's Bay,Hawke's_Bay_Region_(3),"Central Hawke's Bay District, Hastings District , Wairoa District","Napier , Hastings , Havelock North, Wairoa , Waipukurau , Clive , Waipawa , Tuai , Frasertown , Nūhaka , Mahia Beach , Whirinaki , Whakatu , Haumoana , Te Awanga , Waimārama , Tikokino , Ongaonga , Takapau , Ōtāne , Porangahau"
Taranaki,Taranaki_Region_(2),"New Plymouth District , South Taranaki District","New Plymouth, Hāwera , Waitara , Stratford , Inglewood , Eltham , Ōpunake , Patea"
Manawatū-Whanganui,Manawatū-Whanganui_Region_(4),"Horowhenua District, Manawatu District , Ruapehu District , Whanganui District","Palmerston North, Whanganui , Levin , Feilding , Dannevirke , Marton , Taumarunui , Foxton , Ashhurst , Pahiatua , Bulls , Foxton Beach , Taihape , Woodville , Shannon , Ohakune , Raetihi , Bunnythorpe , Eketāhuna , Halcombe , Himatangi Beach , Hiwinui , Hokio Beach , Hunterville , Kai Iwi , Kimbolton , Koitiata , Longburn , Manakau , Mangaore , Mangaweka , National Park , Norsewood , Ohakea , Ōhau , Ohura , Ormondville , Ōwhango , Pohangina , Pongaroa , Rangataua , Rātana Pā , Raurimu , Rongotea , Sanson , Tangimoana , Tokomaru , Waikawa Beach , Waiouru , Waitarere Beach"
Wellington,Wellington_Region_(4),"Carterton District , Kapiti Coast District , Masterton District , South Wairarapa District","Wellington , Lower Hutt , Porirua , Upper Hutt , Paraparaumu , Masterton , Waikanae , Carterton , Ōtaki , Featherston , Greytown , Ōtaki Beach , Martinborough, Paekākāriki"
Tasman,,,"Richmond , Motueka , Wakefield , Brightwater , Tākaka , Māpua , Collingwood , Pohara/Ligar Bay/Tata Beach/Tarakohe/Wainui, Tākaka , Murchison , Saint Arnaud , Tapawera , Kaiteriteri , Mārahau , Motueka , Riwaka , Brightwater , Māpua–Ruby Bay , Tasman , Upper Moutere , Wakefield , Richmond"


### At this point I (Sam) imported my weather data's sites to create convert the cities to regions

In [65]:
# Importing the sites list from the weather data
Sites_df = read_csv("Sites_df.csv")
Sites_df[[1]]

[1mRows: [22m[34m30[39m [1mColumns: [22m[34m1[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (1): Sites

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [66]:
# Manualy checking which regions the cities belong to (unsure how to automate)
which(Sites_df[[1]] %in% Districts_df[[1]][[1]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[2]])  # 1
which(Sites_df[[1]] %in% Districts_df[[1]][[3]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[4]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[5]])  # 6
which(Sites_df[[1]] %in% Districts_df[[1]][[6]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[7]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[8]])  #
which(Sites_df[[1]] %in% Districts_df[[1]][[9]])  # 27
which(Sites_df[[1]] %in% Districts_df[[1]][[10]]) #
which(Sites_df[[1]] %in% Districts_df[[1]][[11]]) # 16
which(Sites_df[[1]] %in% Districts_df[[1]][[12]]) #
which(Sites_df[[1]] %in% Districts_df[[1]][[13]]) #
which(Sites_df[[1]] %in% Districts_df[[1]][[14]]) #
which(Sites_df[[1]] %in% Districts_df[[1]][[15]]) #
which(Sites_df[[1]] %in% Districts_df[[1]][[16]]) #

In [67]:
# Manualy checking which regions the cities belong to (unsure how to automate)
which(Sites_df[[1]] %in% Districts_df[[4]][[1]])  # 11
which(Sites_df[[1]] %in% Districts_df[[4]][[2]])  # 1
which(Sites_df[[1]] %in% Districts_df[[4]][[3]])  # 8
which(Sites_df[[1]] %in% Districts_df[[4]][[4]])  # 20,24
which(Sites_df[[1]] %in% Districts_df[[4]][[5]])  #
which(Sites_df[[1]] %in% Districts_df[[4]][[6]])  # 15
which(Sites_df[[1]] %in% Districts_df[[4]][[7]])  # 17
which(Sites_df[[1]] %in% Districts_df[[4]][[8]])  # 4,22,26,28
which(Sites_df[[1]] %in% Districts_df[[4]][[9]])  # 13,27
which(Sites_df[[1]] %in% Districts_df[[4]][[10]]) #
which(Sites_df[[1]] %in% Districts_df[[4]][[11]]) #
which(Sites_df[[1]] %in% Districts_df[[4]][[12]]) # 2
which(Sites_df[[1]] %in% Districts_df[[4]][[13]]) # 9,19
which(Sites_df[[1]] %in% Districts_df[[4]][[14]]) # 3,25
which(Sites_df[[1]] %in% Districts_df[[4]][[15]]) # 5,18
which(Sites_df[[1]] %in% Districts_df[[4]][[16]]) # 7,10

In [68]:
# Creating a corresponding region list.
# Lake Tekapo, Milford Sound, Tara Hills, & Whangaparaoa are not in the dataframe so I googled to manually find their region.
# Taupo & Whangarei were in the dataframe but spelt differently.
Sites_regions <- c(Districts_df[[1]][[2]],
                   Districts_df[[1]][[12]],
                   Districts_df[[1]][[14]],
                   Districts_df[[1]][[8]],
                   Districts_df[[1]][[15]],
                   Districts_df[[1]][[5]],
                   Districts_df[[1]][[16]],
                   Districts_df[[1]][[3]],
                   Districts_df[[1]][[13]],
                   Districts_df[[1]][[16]],
                   Districts_df[[1]][[1]],
                   Districts_df[[1]][[14]], # Cantebury
                   Districts_df[[1]][[9]],
                   Districts_df[[1]][[16]], # Southland
                   Districts_df[[1]][[6]],
                   Districts_df[[1]][[11]],
                   Districts_df[[1]][[7]],
                   Districts_df[[1]][[15]],
                   Districts_df[[1]][[13]],
                   Districts_df[[1]][[4]],
                   Districts_df[[1]][[15]], # Otago
                   Districts_df[[1]][[8]],
                   Districts_df[[1]][[3]], # Waikato (spelling)
                   Districts_df[[1]][[4]],
                   Districts_df[[1]][[14]],
                   Districts_df[[1]][[8]],
                   Districts_df[[1]][[9]],
                   Districts_df[[1]][[8]],
                   Districts_df[[1]][[2]], # Auckland
                   Districts_df[[1]][[1]]) # Northland (spelling)
Sites_regions            

In [69]:
Sites_df %<>% mutate(Regions = Sites_regions)
Sites_df

Sites,Regions
<chr>,<chr>
Auckland,Auckland
Blenheim,Marlborough
Christchurch,Canterbury
Dannevirke,Manawatū-Whanganui
Dunedin,Otago
Gisborne,Gisborne
Gore,Southland
Hamilton,Waikato
Hokitika,West Coast
Invercargill,Southland


In [129]:
# Saving the appended dataframe as a csv
# For use with the DATA422_Weather_Data notebook
Sites_df %>% write_csv("Sites_df_v2.csv")

In [130]:
# Making a list of the regions and saving as a csv
# For use with the DATA422_Final_Dataset notebook
Districts_df[[1]] %>% unlist() %>% sort() %>% as.data.frame() %>% write_csv("Region_list.csv")