In [1]:
setwd(fs::path_abs("~/Local_Workspace/TesiMag"))
library(arrow, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)

source("src/database/startup.R")
source("src/database/query/spatial.R")
source("src/database/query/data.R")
source("src/database/query/pairing.R")
source("src/database/query/analysis.R")
source("src/pairing/displaying.R")

conns <- load_dbs()


Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE



In [2]:
query <- glue::glue_sql(
    "
        SELECT
            sx.dataset AS dataset_x,
            sx.id AS id_x,
            sy.dataset AS dataset_y,
            sy.id AS id_y,
            ST_Distance(sx.geog, sy.geog) AS distance
        FROM station_geo sx
        JOIN station_geo sy
        ON ST_DWithin(sx.geog, sy.geog, 2000)
        WHERE sx.id < sy.id AND sx.dataset = 'SCIA' AND sy.dataset = 'SCIA'
    ",
    .con = statconn
)
close_stations <- dbGetQuery(conns$stations, query)
dbWriteTable(
    conns$data,
    "close_stations_tmp",
    close_stations,
    overwrite = TRUE
)


In [3]:
dbExecute(conns$data, "SET memory_limit = '12.5GB'")


In [2]:
metadata <- tbl(conns$data, "stations_tmp") |> filter(dataset == "SCIA")
scia <- valid_data(conns$data) |> semi_join(metadata, join_by(station_id == id))
matches <- scia |>
    valid_series() |>
    series_matches(tbl(conns$data, "close_stations_tmp"), asymmetric = TRUE) |>
    select(id_x, id_y, variable, distance) |>
    collect()


In [3]:
analysis <- series_matches_analysis(matches, scia, metadata)


In [12]:
analysis |>
    rename(all_filter = qc_clim_available, offset_days = offset) |>
    write_xslx_analysis(file.path("notebooks", "integrazioni_nazionali", "SCIA", "internal_matches.xlsx"))
