diff --git a/DESCRIPTION b/DESCRIPTION index 7ee02d5..e2b4cbc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ BugReports: https://github.com/ipeaGIT/censobr/issues Depends: R (>= 4.0) Imports: - arrow, + arrow (>= 15.0.1), checkmate, dplyr, httr (>= 1.4.1), diff --git a/data_prep/R/census_tracts_aggreg_2022_prelim.R b/data_prep/R/census_tracts_aggreg_2022_prelim.R index 98148bf..ef6c0b6 100644 --- a/data_prep/R/census_tracts_aggreg_2022_prelim.R +++ b/data_prep/R/census_tracts_aggreg_2022_prelim.R @@ -11,12 +11,10 @@ df[, CD_SETOR := gsub("P","", CD_SETOR)] head(df) -# make all columns as character -df <- mutate(df, across(everything(), as.character)) - +# rename columns names(df) <- toupper(names(df)) -df2 <- dplyr::rename(df, +df2 <- dplyr::select(df, code_tract = CD_SETOR, code_muni = CD_MUN, name_muni = NM_MUN, @@ -38,10 +36,23 @@ df2 <- dplyr::rename(df, name_intermediate = NM_RGINT, code_region = CD_REGIAO, name_region = NM_REGIAO, + V0001 = V0001, + V0002 = V0002, + V0003 = V0003, + V0004 = V0004, + V0005 = V0005, + V0006 = V0006, + V0007 = V0007, area_km2 = AREA_KM2 ) head(df2) +# make all columns as character +character_cols <- names(df2)[names(df2) %like% 'code_|name_'] +df2 <- mutate(df2, across(everything(character_cols), as.character)) + +sapply(df2, class) + # save dir.create('./data/tracts/2022/', recursive = T) @@ -49,6 +60,6 @@ dir.create('./data/tracts/2022/', recursive = T) dest_file <- paste0('2022_tracts_Preliminares.parquet') arrow::write_parquet(df2, paste0('./data/tracts/2022/', dest_file)) -arrow::write_parquet(df2, 'd2.parquet') +