Skip to content

Commit

Permalink
schema_col_classes
Browse files Browse the repository at this point in the history
  • Loading branch information
rafapereirabr committed May 23, 2024
1 parent c92b3be commit 23748d2
Showing 1 changed file with 270 additions and 0 deletions.
270 changes: 270 additions & 0 deletions data_prep/R/schema_col_classes.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,273 @@ df[, col_class := fcase(decimal_places>0, '= numeric(),',


df[,. (var_name, col_class)] |> print()



library(arrow)
library(dplyr)
library(censobr)

aaa <- schema(
code_muni= int32(),
code_state= int32(),
abbrev_state= string(),
name_state= string(),
code_region= int32(),
name_region = string(),
code_weighting= int64(),
V0001 = int32(),
V0002 = int32(),
V0011 = int64(),
V0300 = double(),
V0010 = double(),
V1001 = int32(),
V1002 = int32(),
V1003 = int32(),
V1004 = int32(),
V1006 = int32(),
V0502 = int32(),
V0504 = int32(),
V0601 = int32(),
V6033 = int32(),
V6036 = int32(),
V6037 = int32(),
V6040 = int32(),
V0606 = int32(),
V0613 = int32(),
V0614 = int32(),
V0615 = int32(),
V0616 = int32(),
V0617 = int32(),
V0618 = int32(),
V0619 = int32(),
V0620 = int32(),
V0621 = int32(),
V0622 = int32(),
V6222 = int32(),
V6224 = int32(),
V0623 = int32(),
V0624 = int32(),
V0625 = int32(),
V6252 = int32(),
V6254 = int32(),
V6256 = int32(),
V0626 = int32(),
V6262 = int32(),
V6264 = int32(),
V6266 = int32(),
V0627 = int32(),
V0628 = int32(),
V0629 = int32(),
V0630 = int32(),
V0631 = int32(),
V0632 = int32(),
V0633 = int32(),
V0634 = int32(),
V0635 = int32(),
V6400 = int32(),
V6352 = int32(),
V6354 = int32(),
V6356 = int32(),
V0636 = int32(),
V6362 = int32(),
V6364 = int32(),
V6366 = int32(),
V0637 = int32(),
V0638 = int32(),
V0639 = int32(),
V0640 = int32(),
V0641 = int32(),
V0642 = int32(),
V0643 = int32(),
V0644 = int32(),
V0645 = int32(),
V6461 = int32(),
V6471 = int32(),
V0648 = int32(),
V0649 = int32(),
V0650 = int32(),
V0651 = int32(),
V6511 = int32(),
V6513 = int32(),
V6514 = double(),
V0652 = int32(),
V6521 = double(),
V6524 = double(),
V6525 = double(),
V6526 = double(),
V6527 = double(),
V6528 = double(),
V6529 = double(),
V6530 = double(),
V6531 = double(),
V6532 = double(),
V0653 = double(),
V0654 = int32(),
V0655 = int32(),
V0656 = int32(),
V0657 = int32(),
V0658 = int32(),
V0659 = int32(),
V6591 = int32(),
V0660 = int32(),
V6602 = int32(),
V6604 = int32(),
V6606 = int32(),
V0661 = int32(),
V0662 = int32(),
V0663 = int32(),
V6631 = int32(),
V6632 = int32(),
V6633 = int32(),
V0664 = int32(),
V6641 = double(),
V6642 = int32(),
V6643 = int32(),
V0665 = int32(),
V6660 = int32(),
V6664 = int32(),
V0667 = int32(),
V0668 = int32(),
V6681 = int32(),
V6682 = int32(),
V0669 = int32(),
V6691 = int32(),
V6692 = int32(),
V6693 = int32(),
V6800 = int32(),
V0670 = int32(),
V0671 = int32(),
V6900 = int32(),
V6910 = int32(),
V6920 = int32(),
V6930 = int32(),
V6940 = int32(),
V6121 = int32(),
V0604 = int32(),
V0605 = int32(),
V5020 = int32(),
V5060 = int32(),
V5070 = double(),
V5080 = double(),
V6462 = int32(),
V6472 = int32(),
V5110 = int32(),
V5120 = int32(),
V5030 = int32(),
V5040 = int32(),
V5090 = int32(),
V5100 = int32(),
V5130 = int32(),
M0502 = int32(),
M0601 = int32(),
M6033 = int32(),
M0606 = int32(),
M0613 = int32(),
M0614 = int32(),
M0615 = int32(),
M0616 = int32(),
M0617 = int32(),
M0618 = int32(),
M0619 = int32(),
M0620 = int32(),
M0621 = int32(),
M0622 = int32(),
M6222 = int32(),
M6224 = int32(),
M0623 = int32(),
M0624 = int32(),
M0625 = int32(),
M6252 = int32(),
M6254 = int32(),
M6256 = int32(),
M0626 = int32(),
M6262 = int32(),
M6264 = int32(),
M6266 = int32(),
M0627 = int32(),
M0628 = int32(),
M0629 = int32(),
M0630 = int32(),
M0631 = int32(),
M0632 = int32(),
M0633 = int32(),
M0634 = int32(),
M0635 = int32(),
M6352 = int32(),
M6354 = int32(),
M6356 = int32(),
M0636 = int32(),
M6362 = int32(),
M6364 = int32(),
M6366 = int32(),
M0637 = int32(),
M0638 = int32(),
M0639 = int32(),
M0640 = int32(),
M0641 = int32(),
M0642 = int32(),
M0643 = int32(),
M0644 = int32(),
M0645 = int32(),
M6461 = int32(),
M6471 = int32(),
M0648 = int32(),
M0649 = int32(),
M0650 = int32(),
M0651 = int32(),
M6511 = int32(),
M0652 = int32(),
M6521 = int32(),
M0653 = int32(),
M0654 = int32(),
M0655 = int32(),
M0656 = int32(),
M0657 = int32(),
M0658 = int32(),
M0659 = int32(),
M6591 = int32(),
M0660 = int32(),
M6602 = int32(),
M6604 = int32(),
M6606 = int32(),
M0661 = int32(),
M0662 = int32(),
M0663 = int32(),
M6631 = int32(),
M6632 = int32(),
M6633 = int32(),
M0664 = int32(),
M6641 = int32(),
M6642 = int32(),
M6643 = int32(),
M0665 = int32(),
M6660 = int32(),
M0667 = int32(),
M0668 = int32(),
M6681 = int32(),
M6682 = int32(),
M0669 = int32(),
M6691 = int32(),
M6692 = int32(),
M6693 = int32(),
M0670 = int32(),
M0671 = int32(),
M6800 = int32(),
M6121 = int32(),
M0604 = int32(),
M0605 = int32(),
M6462 = int32(),
M6472 = int32(),
V1005 = int32(),
)




df2 <- arrow::open_dataset('2010_population_v0.3.0.parquet', schema =aaa)
df_new <- df %>% select(-ends_with("linear"))


arrow::write_parquet(df2, 'pop_2010_integer.parquet')

0 comments on commit 23748d2

Please sign in to comment.