In [1]:
# install.packages("rvest")
# remotes::install_github("dmi3kno/polite")("politer")
# install.packages("xml2")
library(rvest) # rvest makes scraping easier
library(polite) # polite is the "polite" version of rvest
library(xml2) # makes it easier to work with HTML and XML from R


library(tidyverse)
library(magrittr)
library(purrr) 
library(glue) 
library(stringr) 

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.8     [32m✔[39m [34mdplyr  [39m 1.0.9
[32m✔[39m [34mtidyr  [39m 1.2.0     [32m✔[39m [34mstringr[39m 1.4.1
[32m✔[39m [34mreadr  [39m 2.1.2     [32m✔[39m [34mforcats[39m 0.5.1
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m         masks [34mstats[39m::filter()
[31m✖[39m [34mreadr[39m::[32mguess_encoding()[39m masks [34mrvest[39m::guess_encoding()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m            masks [34mstats[39m::lag()

Attaching package: ‘magrittr’


The following object is masked from ‘package:purrr’:

    set_names


The following object is masked from ‘package:tidyr’:

    extract




## Use API

To get the weather history data we use api from this website https://www.visualcrossing.com/ . The api from that website allow us to get various kind of information about the weather in specific city and in certain range of time. However in our case we only interested about weather history from a week ago to the current date.

In [2]:
#first we load the httr library so that we can fetch the response from an api
library(httr)
library(jsonlite)


Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten




In [3]:
# this is helper function to get the actually desired data inside the list
# this function takes 2 parameter, the first one is the data which is a list of all the information about the weather for each day, the second parameter is the index which is a string of the data we want to get
get_information_list <- function(data, index) {
    empty <- vector(mode = "list", length = 0) # first we prepare an empty list which will be appended with the information we want to get
    for(i in seq_along(data)){ # next we loop through every item inside the data given
        empty <- append(empty, data[i][[1]][[index]]) # then we append the data to the list we prepared before
    }
    return(empty)
}

#this is the function to get the weather history from a week ago until the current date
#this function will return a tibble which contains the information about the weather from 7 days ago to the current date. The information includes the date, temperature, windspeed, humidity, and conditions
#this function takes 2 parameters, first one is the city which is a string that indicates the city we want get the weather data from, next is the apiKey which is a string of the API access token from the website mentioned above
get_weather_data <- function(city, apiKey){

    currentDate <- Sys.Date() # in this code we try to
    
    lastWeek <- as.Date(currentDate) - 6 # then we also get the date from a week ago
    
    location <- c(city, city, city, city, city, city, city)

    response_raw <- GET(glue("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{city}/{lastWeek}/{currentDate}?unitGroup=metric&key={apiKey}&contentType=json")) # the we use the api to get the weather data by specifying the city and dates
    response <- content(response_raw) # here I get the body content of the response

    #then in the code below, we create new tibble/data frame and use the get_information_list() function we defined above to get the data for each column
    result_weather_data <- tibble(Location = location,
                                    Date = get_information_list(response[["days"]], "datetime"),
                                    Temperature = get_information_list(response[["days"]], "temp"),
                                    WindSpeed = get_information_list(response[["days"]], "windspeed"),
                                    Humidity = get_information_list(response[["days"]], "humidity"),
                                    Conditions = get_information_list(response[["days"]], "conditions"),
                                    )

    #here we make sure each colum has correct data types
    result_weather_data <- result_weather_data %>% mutate(across(Date, as.Date)) %>%
                                                    mutate(across(Temperature, as.double)) %>%
                                                    mutate(across(WindSpeed, as.double)) %>%
                                                    mutate(across(Humidity, as.double)) %>%
                                                    mutate(across(Conditions, as.character)) %>% 
                                                    mutate(across(Location, str_to_title))
    return(result_weather_data)
}

  get_weather_data_safe <- purrr::possibly(.f = get_weather_data, # so here we want to make sure that the the function is showing error message if any error happens
                                   otherwise = "Something went wrong: possibly maximum number of request has been reached")

chch_data<-get_weather_data_safe("christchurch","GMTLW3T9CX22FK8GH662Q9GPZ")
chch_data
                                                           
chch_data %<>% write.csv("new_weather_chch_data.csv")
chch_data
                                                           
akl_data<-get_weather_data_safe("auckland","GMTLW3T9CX22FK8GH662Q9GPZ")
akl_data

akl_data %<>% write.csv("new_weather_akl_data.csv")
akl_data
                                                          
welly_data<-get_weather_data_safe("wellington","GMTLW3T9CX22FK8GH662Q9GPZ")
welly_data
                                                           
welly_data %<>% write.csv("new_weather_welly_data.csv")
welly_data

Date,Temperature,WindSpeed,Humidity,Conditions,Location
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
2022-10-20,11.8,38.0,61.4,Clear,Christchurch
2022-10-21,9.6,38.4,56.7,"Rain, Partially cloudy",Christchurch
2022-10-22,7.4,22.3,73.7,Clear,Christchurch
2022-10-23,15.7,29.5,61.9,Clear,Christchurch
2022-10-24,12.0,51.8,62.9,"Rain, Partially cloudy",Christchurch
2022-10-25,8.5,29.5,75.3,"Rain, Partially cloudy",Christchurch
2022-10-26,11.6,15.5,77.7,Partially cloudy,Christchurch


NULL

Date,Temperature,WindSpeed,Humidity,Conditions,Location
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
2022-10-20,15.1,35.9,69.7,Partially cloudy,Auckland
2022-10-21,15.3,40.0,74.2,"Rain, Partially cloudy",Auckland
2022-10-22,14.5,15.9,70.2,"Rain, Partially cloudy",Auckland
2022-10-23,14.3,25.9,76.7,Clear,Auckland
2022-10-24,15.5,33.5,81.4,Partially cloudy,Auckland
2022-10-25,15.9,27.7,75.3,"Rain, Partially cloudy",Auckland
2022-10-26,15.4,20.5,76.5,"Rain, Partially cloudy",Auckland


NULL

Date,Temperature,WindSpeed,Humidity,Conditions,Location
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
2022-10-20,14.1,37.6,65.8,Clear,Wellington
2022-10-21,11.3,45.1,62.5,"Rain, Partially cloudy",Wellington
2022-10-22,10.6,21.8,64.5,Clear,Wellington
2022-10-23,13.5,44.6,74.6,Partially cloudy,Wellington
2022-10-24,15.5,46.4,66.5,"Rain, Partially cloudy",Wellington
2022-10-25,11.0,37.1,65.0,"Rain, Partially cloudy",Wellington
2022-10-26,12.3,42.5,77.1,Partially cloudy,Wellington


NULL