In [1]:
setwd(fs::path_abs("~/Local_Workspace/TesiMag"))
library(dplyr, warn.conflicts = FALSE)
library(arrow, warn.conflicts = FALSE)

source("notebooks/ds_regionali/procedure/common_steps.R")
source("src/database/query/data.R")
source("src/database/startup.R")
source("src/load/tools.R")

conns <- load_dbs()


Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE



In [2]:
save_kml_orig <- function(data, key, base_path) {
    # extra_meta <- dbReadTable(conns$stations, str_glue("extra_{key$dataset}"))
    extra_meta <- read_parquet(file.path("db", "extra", "metadata", str_glue("{key$dataset}.parquet")))
    data <- data |> left_join(extra_meta |> select(!c(starts_with("url"), starts_with("fk_id"))), by = "sensor_key")
    path <- file.path(base_path, key$reference_state, "original")
    if (!dir.exists(path)) {
        dir.create(path, recursive = TRUE)
    }
    data |> write.csv(file.path(path, paste0(key$dataset, ".csv")), row.names = FALSE)
    data |>
        st_md_to_sf(remove = FALSE) |>
        st_write(file.path(path, paste0(key$dataset, ".kml")), append = FALSE)
}

save_kml_merged <- function(meta, key, base_path) {
    path <- file.path(base_path, key$reference_state, "original")
    if (!dir.exists(path)) {
        dir.create(path, recursive = TRUE)
    }
    meta <- meta |> rowwise() |> mutate(across(starts_with("from_"), ~ paste0(., collapse = ";")))
    meta |> write.csv(file.path(path, "merged.csv"), row.names = FALSE)
    meta |>
        st_md_to_sf(remove = FALSE) |>
        st_write(file.path(path, "merged.kml"), append = FALSE)
}


In [3]:
regional_datasets <- tribble(
    ~reference_state, ~original_dataset, ~merged_dataset,
    "Piemonte" , "ARPAPiemonte", "PIE",
    "Liguria" , "ARPAL", "LIG",
    "Lombardia" , "ARPALombardia", "LOM",
    # "Veneto" , "ARPAV", "VEN",
    # "Trentino-Alto Adige" , "TAA", "TAA2",
    "Friuli-Venezia Giulia" , "ARPAFVG", "FVG",
    "Emilia-Romagna" , "Dext3r", "ER",
    "Toscana" , "SIRToscana", "TOS",
    # "Umbria" , "ARPAUmbria", "UMB",
    "Marche" , "ARPAM", "MAR",
    "Valle D'Aosta", NA_character_, "VDA"
)


In [4]:
query_checkpoint_meta(regional_datasets$merged_dataset, "merged", conns$data) |>
    collect() |>
    left_join(regional_datasets, by = c("dataset" = "merged_dataset")) |>
    group_by(reference_state) |>
    group_walk(~ save_kml_merged(.x, .y, fs::path_abs("~/Local_Workspace/TesiMag/kml_metadata")))

Writing layer `merged' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Emilia-Romagna/original/merged.kml' using driver `KML'
Writing 405 features with 26 fields and geometry type Point.
Writing layer `merged' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Friuli-Venezia Giulia/original/merged.kml' using driver `KML'
Writing 250 features with 26 fields and geometry type Point.
Writing layer `merged' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Liguria/original/merged.kml' using driver `KML'
Writing 236 features with 26 fields and geometry type Point.
Writing layer `merged' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Lombardia/original/merged.kml' using driver `KML'
Writing 352 features with 26 fields and geometry type Point.
Writing layer `merged' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Marche/original/merged.kml' using driver `KML'

In [5]:
query_checkpoint_meta(regional_datasets$original_dataset |> na.omit(), "qc1") |>
    collect() |>
    left_join(regional_datasets, by = c("dataset" = "original_dataset")) |>
    group_by(reference_state, dataset) |>
    group_walk(~ save_kml_orig(.x, .y, fs::path_abs("~/Local_Workspace/TesiMag/kml_metadata")))


Writing layer `Dext3r' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Emilia-Romagna/original/Dext3r.kml' using driver `KML'
Writing 623 features with 43 fields and geometry type Point.
Writing layer `ARPAFVG' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Friuli-Venezia Giulia/original/ARPAFVG.kml' using driver `KML'
Writing 53 features with 27 fields and geometry type Point.
Writing layer `ARPAL' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Liguria/original/ARPAL.kml' using driver `KML'
Writing 188 features with 27 fields and geometry type Point.


"GDAL Error 1: XML parsing of KML file failed : not well-formed (invalid token) at line 30, column 24"


Writing layer `ARPALombardia' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Lombardia/original/ARPALombardia.kml' using driver `KML'
Writing 255 features with 31 fields and geometry type Point.
Writing layer `ARPAM' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Marche/original/ARPAM.kml' using driver `KML'
Writing 157 features with 26 fields and geometry type Point.
Writing layer `ARPAPiemonte' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Piemonte/original/ARPAPiemonte.kml' using driver `KML'
Writing 325 features with 35 fields and geometry type Point.
Writing layer `SIRToscana' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Toscana/original/SIRToscana.kml' using driver `KML'
Writing 441 features with 33 fields and geometry type Point.


In [6]:
query_checkpoint_meta(c("SCIA", "ISAC"), "raw") |>
    collect() |>
    mutate(reference_state = coalesce(state, "Unknown")) |>
    group_by(reference_state, dataset) |>
    group_walk(~ save_kml_orig(.x, .y, fs::path_abs("~/Local_Workspace/TesiMag/kml_metadata")))

Writing layer `ISAC' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Abruzzo/original/ISAC.kml' using driver `KML'
Writing 186 features with 32 fields and geometry type Point.
Writing layer `SCIA' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Abruzzo/original/SCIA.kml' using driver `KML'
Writing 151 features with 28 fields and geometry type Point.
Writing layer `ISAC' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Basilicata/original/ISAC.kml' using driver `KML'
Writing 58 features with 32 fields and geometry type Point.
Writing layer `SCIA' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Basilicata/original/SCIA.kml' using driver `KML'
Writing 62 features with 28 fields and geometry type Point.
Writing layer `ISAC' to data source 
  `/Users/davidenicoli/Local_Workspace/TesiMag/kml_metadata/Calabria/original/ISAC.kml' using driver `KML'
Writing 114 features with 32 fields 

In [10]:
open_dataset(archive_path(c("SCIA", "ISAC"), "metadata", "qc1")) |>
    collect() |>
    filter(is.na(state))

ERROR: [1m[33mError[39m in `open_dataset()`:[22m
[33m![39m IOError: Error creating dataset. Could not read schema from '/Users/davidenicoli/Local_Workspace/TesiMag/db/metadata/qc1/SCIA'. Is this a 'parquet' file?: Cannot open for reading: path '/Users/davidenicoli/Local_Workspace/TesiMag/db/metadata/qc1/SCIA' is a directory
