# Weather Data Preparation

In [1]:
rm(list=ls())
options(warn = -1)

In [2]:
suppressMessages(library(fs))
suppressMessages(library(stringr))
suppressMessages(library(tidyverse))

In [3]:
# Read files and select columns
folder = getwd()
file_list <- dir_ls(file.path(dirname(folder), "data/weather/observed_data_2015_2023/raw"), regexp = "\\.csv$")
dir.create(file.path(dirname(folder), "data/weather/observed_data_2015_2023/final"))

for (i in seq_along(file_list)) {
    file <- file_list[i]
    tmp <- read.csv(file, sep = ",")
    tmp <- na.omit(tmp)

    # Adjust date
    m <- str_sub(tmp$time, 1, 10)
    tmp$time <- NULL
    date <- as.Date(m)
    tmp$date <- date

    # Group on day 
    tmp_agg <- aggregate(tmp, by=list(tmp$date), FUN=mean)
    tmp_agg$date <- tmp_agg$Group.1
    tmp_agg$Group.1 <- NULL
    colnames(tmp_agg) <- c("temperature_c", "pressure_sl_m", "windspeed_100m_kmh", "date")
    tmp_agg <- tmp_agg[, order(names(tmp_agg))]

    # Save file
    write.csv(tmp_agg, file = file.path(dirname(folder), "data/weather/observed_data_2015_2023/final", 
                      paste0(tools::file_path_sans_ext(basename(file)), "_final.csv")), row.names = FALSE)
    }

In [4]:
# Create unique df
file_list <- dir_ls(file.path(dirname(folder), "data/weather/observed_data_2015_2023/final"), regexp = "\\.csv$")
weather <- list()
 
for (i in seq_along(file_list)) {
    file <- file_list[i]
    tmp <- read.csv(file, sep = ",")
    city <- basename(file)
    city <- strsplit(city, split = "_")[[1]][1]
    city <- paste0("_", city)
  
    colnames(tmp) <- c("date",paste0(c("pressure_sl_m", "temperature_c", "windspeed_100m_kmh"), city))  
    weather <- append(weather, list(tmp))
}
weather <- weather %>% reduce(full_join, by='date')

In [5]:
dir.create(file.path(dirname(folder), "data/weather/observed_data_2015_2023/final/merged"))
write.csv(weather, file = file.path(dirname(folder), "data/weather/observed_data_2015_2023/final/merged", "weather.csv"), row.names = FALSE)