In [0]:
install.packages("gdldata")

devtools::install_github("GlobalDataLab/R-data-api")

library(gdldata)

api_key <- dbutils.widgets.get("api")
print(api_key)
sess <- gdl_session(api_key)

In [0]:
# shdi is their dataset where population data lives 
sess <- set_dataset(sess, "shdi")

# Filter to Liberia (ISO3 “LBR”) and the single “pop” indicator to remove noise
sess <- set_country(sess,  "LBR")    
sess <- set_indicator(sess, "pop")   # Population size in thousands 

# Specify the administrative levels you want (1 = county, 4 = district)
sess <- set_levels(sess, c(1, 4))   

# Turn off interpolation and extrapolation so you only get observed survey years
sess <- set_interpolation(sess, FALSE)           
sess <- set_extrapolation_years_linear(sess, 0)  
sess <- set_extrapolation_years_nearest(sess, 0)

pop_ts <- gdl_request(sess)           

# Inspect
head(pop_ts)

In [0]:
names(pop_ts)


In [0]:
suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
})

pop_ts_clean <- pop_ts %>%
  filter(Level == "Subnat") %>%
  pivot_longer(
    cols           = starts_with("X"),
    names_to       = "year",
    names_prefix   = "X",
    values_to      = "population",
    values_drop_na = TRUE
  ) %>%
  mutate(
    country_name = "Liberia",
    adm1_name    = Region,
    year         = as.integer(year),
    population   = as.numeric(population)*1000,
    data_sources = paste0(
      "https://globaldatalab.org/shdi/table/",
      year,
      "/shdi+lgnic+pop/LBR/?levels=1+4&interpolation=0&extrapolation=0"
    )
  ) %>%
  select(country_name, adm1_name, year, population, data_sources)

head(pop_ts_clean, 10)


In [0]:
library(SparkR)

sparkR.session()

database_name <- "prd_mega.indicator_intermediate"
table_name    <- "lbr_subnational_population"
full_table    <- paste(database_name, table_name, sep = ".")

sql(paste("CREATE DATABASE IF NOT EXISTS", database_name))

sdf_pop <- createDataFrame(pop_ts_clean)

saveAsTable(
  sdf_pop,     
  full_table,   
  mode = "overwrite"
)