In [1]:
setwd(fs::path_abs("~/Local_Workspace/TesiMag"))

library(arrow, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(tibble, warn.conflicts = FALSE)
library(tidyr, warn.conflicts = FALSE)
library(stringr, warn.conflicts = FALSE)

source("src/paths/paths.R")
source("src/load/load.R")

start_date <- as.Date("2000-01-01")


Linking to GEOS 3.12.0, GDAL 3.7.3, PROJ 9.3.0; sf_use_s2() is TRUE

terra 1.7.55



In [2]:
combos <- cross_join(tibble(variable = c("T_MAX", "T_MIN")), tibble(flavor = c("raw", "qc_era5", "qc_homo")))
trascr_data <- function(variable_, flavor_, start_date) {
    read.BRUN.series(variable_, flavor_) |>
        filter(date >= start_date) |>
        drop_na({{ variable_ }}) |>
        left_join(read.BRUN.metadata(variable_, flavor_) |> mutate(dpc = (region_ == "dpc")) |> select(identifier, dpc), by = "identifier") |>
        rename(value = {{ variable_ }}) |>
        arrange(dpc, identifier, date) |>
        mutate(variable = variable_, flavor = flavor_) |>
        as_arrow_table(schema = schema(
            identifier = utf8(),
            value = float32(),
            date = date32(),
            dpc = bool(),
            variable = utf8(),
            flavor = utf8()
        )) |>
        write_feather(str_glue("brun_{flavor_}_{variable_}.arrow"))
}


In [7]:
combos |>
    rowwise() |>
    group_walk(~ trascr_data(.x[["variable"]][1], .x[["flavor"]][1], start_date))


In [3]:
ds <- open_dataset(fs::dir_ls(glob = "*.arrow"), format = "feather")


In [10]:
ds |> write_dataset("db/data/brun", format = "feather", partitioning = c("flavor", "variable"))


In [15]:
ld_meta <- function(variable_, flavor_) {
    read.BRUN.metadata(variable_, flavor_) |> mutate(variable = variable_, flavor = flavor_)
}

# brun <- concat_tables(
#     read_parquet("cache/metadata/BRUN/T_MAX/qc_era5.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MAX", flavor = "qc_era5") |> as_arrow_table(),
#     read_parquet("cache/metadata/BRUN/T_MIN/qc_era5.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MIN", flavor = "qc_era5") |> as_arrow_table(),
#     read_parquet("cache/metadata/BRUN/T_MAX/qc_homo.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MAX", flavor = "qc_homo") |> as_arrow_table(),
#     read_parquet("cache/metadata/BRUN/T_MIN/qc_homo.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MIN", flavor = "qc_homo") |> as_arrow_table(),
#     read_parquet("cache/metadata/BRUN/T_MAX/raw.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MAX", flavor = "raw") |> as_arrow_table(),
#     read_parquet("cache/metadata/BRUN/T_MIN/raw.parquet", as_data_frame = FALSE) |> mutate(variable = "T_MIN", flavor = "raw") |> as_arrow_table(),
# )

combos |>
    rowwise() |>
    reframe(data = ld_meta(variable, flavor)) |>
    unnest_wider(data) |>
    mutate(dpc = (region_ == "dpc")) |>
    write_dataset("db/metadata/brun", format = "feather", partitioning = c("flavor", "variable"))


In [16]:
open.dataset("DPC", "metadata")


FileSystemDataset (query)
region_: dictionary<values=string, indices=int8>
country: dictionary<values=string, indices=int8>
anagrafica: string
GSOD: bool
user_code: string
MG: bool
identifier: string
lon: double
lat: double
elevation: double
province: dictionary<values=string, indices=int8>
version: int32
internal_id: int32
state: string
dpc: bool
flavor: string
variable: string

* Filter: dpc
See $.data for the source Arrow object