diff --git a/data_prep/R/schema_col_classes.R b/data_prep/R/schema_col_classes.R index c27ea18..6385d07 100644 --- a/data_prep/R/schema_col_classes.R +++ b/data_prep/R/schema_col_classes.R @@ -16,3 +16,273 @@ df[, col_class := fcase(decimal_places>0, '= numeric(),', df[,. (var_name, col_class)] |> print() + + + +library(arrow) +library(dplyr) +library(censobr) + +aaa <- schema( + code_muni= int32(), + code_state= int32(), + abbrev_state= string(), + name_state= string(), + code_region= int32(), + name_region = string(), + code_weighting= int64(), + V0001 = int32(), + V0002 = int32(), + V0011 = int64(), + V0300 = double(), + V0010 = double(), + V1001 = int32(), + V1002 = int32(), + V1003 = int32(), + V1004 = int32(), + V1006 = int32(), + V0502 = int32(), + V0504 = int32(), + V0601 = int32(), + V6033 = int32(), + V6036 = int32(), + V6037 = int32(), + V6040 = int32(), + V0606 = int32(), + V0613 = int32(), + V0614 = int32(), + V0615 = int32(), + V0616 = int32(), + V0617 = int32(), + V0618 = int32(), + V0619 = int32(), + V0620 = int32(), + V0621 = int32(), + V0622 = int32(), + V6222 = int32(), + V6224 = int32(), + V0623 = int32(), + V0624 = int32(), + V0625 = int32(), + V6252 = int32(), + V6254 = int32(), + V6256 = int32(), + V0626 = int32(), + V6262 = int32(), + V6264 = int32(), + V6266 = int32(), + V0627 = int32(), + V0628 = int32(), + V0629 = int32(), + V0630 = int32(), + V0631 = int32(), + V0632 = int32(), + V0633 = int32(), + V0634 = int32(), + V0635 = int32(), + V6400 = int32(), + V6352 = int32(), + V6354 = int32(), + V6356 = int32(), + V0636 = int32(), + V6362 = int32(), + V6364 = int32(), + V6366 = int32(), + V0637 = int32(), + V0638 = int32(), + V0639 = int32(), + V0640 = int32(), + V0641 = int32(), + V0642 = int32(), + V0643 = int32(), + V0644 = int32(), + V0645 = int32(), + V6461 = int32(), + V6471 = int32(), + V0648 = int32(), + V0649 = int32(), + V0650 = int32(), + V0651 = int32(), + V6511 = int32(), + V6513 = int32(), + V6514 = double(), + V0652 = int32(), + V6521 = double(), + V6524 = double(), + V6525 = double(), + V6526 = double(), + V6527 = double(), + V6528 = double(), + V6529 = double(), + V6530 = double(), + V6531 = double(), + V6532 = double(), + V0653 = double(), + V0654 = int32(), + V0655 = int32(), + V0656 = int32(), + V0657 = int32(), + V0658 = int32(), + V0659 = int32(), + V6591 = int32(), + V0660 = int32(), + V6602 = int32(), + V6604 = int32(), + V6606 = int32(), + V0661 = int32(), + V0662 = int32(), + V0663 = int32(), + V6631 = int32(), + V6632 = int32(), + V6633 = int32(), + V0664 = int32(), + V6641 = double(), + V6642 = int32(), + V6643 = int32(), + V0665 = int32(), + V6660 = int32(), + V6664 = int32(), + V0667 = int32(), + V0668 = int32(), + V6681 = int32(), + V6682 = int32(), + V0669 = int32(), + V6691 = int32(), + V6692 = int32(), + V6693 = int32(), + V6800 = int32(), + V0670 = int32(), + V0671 = int32(), + V6900 = int32(), + V6910 = int32(), + V6920 = int32(), + V6930 = int32(), + V6940 = int32(), + V6121 = int32(), + V0604 = int32(), + V0605 = int32(), + V5020 = int32(), + V5060 = int32(), + V5070 = double(), + V5080 = double(), + V6462 = int32(), + V6472 = int32(), + V5110 = int32(), + V5120 = int32(), + V5030 = int32(), + V5040 = int32(), + V5090 = int32(), + V5100 = int32(), + V5130 = int32(), + M0502 = int32(), + M0601 = int32(), + M6033 = int32(), + M0606 = int32(), + M0613 = int32(), + M0614 = int32(), + M0615 = int32(), + M0616 = int32(), + M0617 = int32(), + M0618 = int32(), + M0619 = int32(), + M0620 = int32(), + M0621 = int32(), + M0622 = int32(), + M6222 = int32(), + M6224 = int32(), + M0623 = int32(), + M0624 = int32(), + M0625 = int32(), + M6252 = int32(), + M6254 = int32(), + M6256 = int32(), + M0626 = int32(), + M6262 = int32(), + M6264 = int32(), + M6266 = int32(), + M0627 = int32(), + M0628 = int32(), + M0629 = int32(), + M0630 = int32(), + M0631 = int32(), + M0632 = int32(), + M0633 = int32(), + M0634 = int32(), + M0635 = int32(), + M6352 = int32(), + M6354 = int32(), + M6356 = int32(), + M0636 = int32(), + M6362 = int32(), + M6364 = int32(), + M6366 = int32(), + M0637 = int32(), + M0638 = int32(), + M0639 = int32(), + M0640 = int32(), + M0641 = int32(), + M0642 = int32(), + M0643 = int32(), + M0644 = int32(), + M0645 = int32(), + M6461 = int32(), + M6471 = int32(), + M0648 = int32(), + M0649 = int32(), + M0650 = int32(), + M0651 = int32(), + M6511 = int32(), + M0652 = int32(), + M6521 = int32(), + M0653 = int32(), + M0654 = int32(), + M0655 = int32(), + M0656 = int32(), + M0657 = int32(), + M0658 = int32(), + M0659 = int32(), + M6591 = int32(), + M0660 = int32(), + M6602 = int32(), + M6604 = int32(), + M6606 = int32(), + M0661 = int32(), + M0662 = int32(), + M0663 = int32(), + M6631 = int32(), + M6632 = int32(), + M6633 = int32(), + M0664 = int32(), + M6641 = int32(), + M6642 = int32(), + M6643 = int32(), + M0665 = int32(), + M6660 = int32(), + M0667 = int32(), + M0668 = int32(), + M6681 = int32(), + M6682 = int32(), + M0669 = int32(), + M6691 = int32(), + M6692 = int32(), + M6693 = int32(), + M0670 = int32(), + M0671 = int32(), + M6800 = int32(), + M6121 = int32(), + M0604 = int32(), + M0605 = int32(), + M6462 = int32(), + M6472 = int32(), + V1005 = int32(), +) + + + + +df2 <- arrow::open_dataset('2010_population_v0.3.0.parquet', schema =aaa) +df_new <- df %>% select(-ends_with("linear")) + + +arrow::write_parquet(df2, 'pop_2010_integer.parquet') +