# Weather Data Preparation

In [1]:
rm(list=ls())

In [2]:
library(fs)
library(stringr)
library(tidyverse)

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.1      [32m✔[39m [34mforcats[39m 0.5.2 
[32m✔[39m [34mreadr  [39m 2.1.3      
── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()


In [3]:
# Read files and select columns
folder = getwd()
file_list <- dir_ls(file.path(dirname(folder), "data/weather/observed_data_2015_2023/raw"), regexp = "\\.csv$")

for (i in seq_along(file_list)) {
    file <- file_list[i]
    tmp <- read.csv(file, sep = ",")
    tmp <- na.omit(tmp)

    # Adjust date
    m<-str_sub(tmp$time, 1, 10)
    tmp$time <- NULL
    date<-as.Date(m)
    tmp$date<-date


    # Group on day 
    tmp_agg<-aggregate(tmp, by=list(tmp$date), FUN=mean)
    tmp_agg$date<-tmp_agg$Group.1
    tmp_agg$Group.1<-NULL
    colnames(tmp_agg) <- c("temperature_c", "pressure_msl", "windspeed_100m", " date")
    tmp_agg <- tmp_agg[, order(names(tmp_agg))]

    # Save file
    write.csv(tmp_agg, file = file.path(dirname(folder), "data/weather/observed_data_2015_2023/final", 
                      paste0(tools::file_path_sans_ext(basename(file)), "_final.csv")), row.names = FALSE)
    }

In [4]:
# Create unique df
file_list <- dir_ls(file.path(dirname(folder), "data/weather/observed_data_2015_2023/final"), regexp = "\\.csv$")
weather <- list()

for (i in seq_along(file_list)) {
    file <- file_list[i]
    tmp <- read.csv(file, sep = ",")
    city <- basename(file)
    city <- strsplit(city, split = "_")[[1]][1]
    city <- paste0("_", city)
  
    colnames(tmp)<-c("date",paste0(c("temperature_c", "pressure_msl", "windspeed_100m"), city))  
    weather <- append(weather, list(tmp))
}
weather <- weather %>% reduce(full_join, by='date')

In [5]:
write.csv(weather, file = file.path(dirname(folder), "data/weather/observed_data_2015_2023/final/merged", "weather.csv"), row.names = FALSE)