Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

migration to MonetDB.R

  • Loading branch information...
commit 9bc60a4f5641515ff79d209f5b19de9c3fd3d106 1 parent 8dff815
Anthony Damico authored
187 American Community Survey/2005-2011 - download all microdata.R
@@ -50,9 +50,8 @@
50 50 # it's running. don't believe me? check the working directory (set below) for a new r data file (.rda) every few hours.
51 51
52 52
53   -require(downloader) # downloads and then runs the source() function on scripts from github
54 53 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
55   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  54 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
56 55
57 56
58 57 # set your ACS data directory
@@ -64,69 +63,54 @@ require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
64 63 setwd( "C:/My Directory/ACS/" )
65 64
66 65
67   -# load the windows.monetdb.configuration() function,
68   -# which allows the easy creation of an executable (.bat) file
69   -# to run the monetdb server specific to this data
70   -source_url( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/windows.monetdb.configuration.R" )
71   -
72   -
73   -# create a folder "MonetDB" in your current working directory.
74   -# so, for example, if you set your current working directory to C:\My Directory\ACS\ above,
75   -# create a new folder C:\My Directory\ACS\MonetDB right now.
76   -
77   -
78   -# if the MonetDB folder doesn't exist in your current working directory,
79   -# this line will create an error.
80   -stopifnot( file.exists( paste0( getwd() , "/MonetDB" ) ) )
81   -
82   -
83 66 # configure a monetdb database for the acs on windows #
84 67
85 68 # note: only run this command once. this creates an executable (.bat) file
86 69 # in the appropriate directory on your local disk.
87 70 # when adding new files or adding a new year of data, this script does not need to be re-run.
88 71
89   -# create a monetdb executable (.bat) file for the medicare basic stand alone public use file
90   -windows.monetdb.configuration(
91   -
92   - # choose a location to store the file that will run the monetdb server on your local computer
93   - # this can be stored anywhere, but why not put it in the monetdb directory
94   - bat.file.location = paste0( getwd() , "\\MonetDB\\monetdb.bat" ) ,
95   -
96   - # figure out the file path of the MonetDB software on your local machine.
97   - # on my windows machine, monetdb version 5.0 defaulted to this directory, but double-check yours:
98   - monetdb.program.path = "C:\\Program Files\\MonetDB\\MonetDB5\\" ,
99   -
100   - # assign the directory where the database will be stored.
101   - # this setting will store the database within the MonetDB folder of the current working directory
102   - database.directory = paste0( getwd() , "\\MonetDB\\" ) ,
103   -
104   - # create a server name for the dataset you want to save into monetdb.
105   - # this will change for different datasets -- for the basic stand alone public use file, just use acs
106   - dbname = "acs" ,
107   -
108   - # choose which port
109   - dbport = 50001
  72 +# create a monetdb executable (.bat) file for the american community survey
  73 +batfile <-
  74 + monetdb.server.setup(
  75 +
  76 + # set the path to the directory where the initialization batch file and all data will be stored
  77 + database.directory = paste0( getwd() , "/MonetDB" ) ,
  78 + # must be empty or not exist
  79 +
  80 + # find the main path to the monetdb installation program
  81 + monetdb.program.path = "C:/Program Files/MonetDB/MonetDB5" ,
  82 +
  83 + # choose a database name
  84 + dbname = "acs" ,
  85 +
  86 + # choose a database port
  87 + # this port should not conflict with other monetdb databases
  88 + # on your local computer. two databases with the same port number
  89 + # cannot be accessed at the same time
  90 + dbport = 50001
110 91 )
111 92
112   -
  93 +
113 94 # this next step is so very important.
114 95
115 96 # store a line of code that will make it easy to open up the monetdb server in the future.
116   -# this should contain the same file path as the "bat.file.location" parameter above,
117   -# but don't simply copy the "paste0( getwd() , "\\MonetDB\\monetdb.bat" )" here,
118   -# because if your current working directory changes at other points, you don't want this line to change.
  97 +# this should contain the same file path as the batfile created above,
119 98 # you're best bet is to actually look at your local disk to find the full filepath of the executable (.bat) file.
120   -# if it's stored in C:\My Directory\ACS\MonetDB\monetdb.bat
121   -# then your shell.exec line should be:
  99 +# if you ran this script without changes, the batfile will get stored in C:\My Directory\ACS\MonetDB\acs.bat
122 100
  101 +# here's the batfile location:
  102 +batfile
123 103
124   -shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
  104 +# note that since you only run the `monetdb.server.setup()` function the first time this script is run,
  105 +# you will need to note the location of the batfile for future MonetDB analyses!
125 106
  107 +# in future R sessions, you can create the batfile variable with a line like..
  108 +# batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
  109 +# obviously, without the `#` comment character
126 110
127 111 # hold on to that line for future scripts.
128 112 # you need to run this line *every time* you access
129   -# the basic stand alone public use files with monetdb.
  113 +# the american community survey files with monetdb.
130 114 # this is the monetdb server.
131 115
132 116 # two other things you need: the database name and the database port.
@@ -134,46 +118,38 @@ shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
134 118 dbname <- "acs"
135 119 dbport <- 50001
136 120
  121 +# now the local windows machine contains a new executable program at "c:\my directory\acs\monetdb\acs.bat"
137 122
138   -# hey try running it now! a shell window should pop up.
139   -# when it runs, my computer shows:
140 123
141   -# MonetDB 5 server v11.13.5 "Oct2012-SP1"
142   -# Serving database 'acs', using 2 threads
143   -# Compiled for x86_64-pc-winnt/64bit with 64bit OIDs dynamically linked
144   -# Found 15.873 GiB available main-memory.
145   -# Copyright (c) 1993-July 2008 CWI.
146   -# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
147   -# Visit http://www.monetdb.org/ for further information
148   -# Listening for connection requests on mapi:monetdb://127.0.0.1:50001/
149   -# MonetDB/JAQL module loaded
150   -# MonetDB/SQL module loaded
151 124
152   -# if that shell window is not open, monetdb commands will not work. period.
153 125
  126 +# it's recommended that after you've _created_ the monetdb server,
  127 +# you create a block of code like the one below to _access_ the monetdb server
154 128
155   -# give the shell window ten seconds to load.
156   -Sys.sleep( 10 )
157 129
  130 +####################################################################
  131 +# lines of code to hold on to for all other `acs` monetdb analyses #
158 132
159   -# end of monetdb database configuration #
  133 +# first: specify your batfile. again, mine looks like this:
  134 +batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
160 135
  136 +# second: run the MonetDB server
  137 +pid <- monetdb.server.start( batfile )
161 138
162   -# the monetdb installation instructions asked you to note the filepath of the monetdb java (.jar) file
163   -# you need it now. create a new 'monetdriver' object containing a character string
164   -# with the filepath of the java database connection file
165   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
  139 +# third: add a ten second system sleep in between the shell.exec() function
  140 +# and the database connection lines. this gives your local computer a chance
  141 +# to get monetdb up and running.
  142 +Sys.sleep( 10 )
166 143
167   -# convert the driver to a monetdb driver
168   -drv <- MonetDB( classPath = monetdriver )
  144 +# fourth: your six lines to make a monet database connection.
  145 +# just like above, mine look like this:
  146 +dbname <- "acs"
  147 +dbport <- 50001
169 148
170   -# notice the dbname and dbport (assigned above during the monetdb configuration)
171   -# get used in this line
172   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
  149 +drv <- dbDriver("MonetDB")
  150 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  151 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
173 152
174   -# now put everything together and create a connection to the monetdb server.
175   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
176   -# from now on, the 'db' object will be used for r to connect with the monetdb server
177 153
178 154
179 155
@@ -187,7 +163,7 @@ single.year.datasets.to.download <- 2005:2011
187 163 three.year.datasets.to.download <- 2007:2011
188 164
189 165 # five-year datasets are available back to 2009
190   -five.year.datasets.to.download <- 2009:2010
  166 +five.year.datasets.to.download <- 2009:2011
191 167
192 168 # # # # # # # # # # # # # #
193 169 # other download examples #
@@ -516,11 +492,6 @@ for ( year in 2050:2005 ){
516 492 )
517 493
518 494
519   - # disconnect from..
520   - dbDisconnect( db )
521   - # ..and reconnect to the database
522   - db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
523   -
524 495 # create the merged table
525 496 dbSendUpdate( db , i.j )
526 497
@@ -553,11 +524,6 @@ for ( year in 2050:2005 ){
553 524 dbGetQuery( db , paste0( "select count(*) as count from " , k , "_m" ) )
554 525 )
555 526
556   - # disconnect from..
557   - dbDisconnect( db )
558   - # ..and reconnect to the database
559   - db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
560   -
561 527
562 528 # create a sqlrepsurvey complex sample design object
563 529 # using the merged (household+person) table
@@ -622,32 +588,57 @@ for ( year in 2050:2005 ){
622 588 dbListTables( db ) # print the tables stored in the current monet database to the screen
623 589
624 590
  591 +
  592 +
  593 +
  594 +
625 595 # disconnect from the current monet database
626 596 dbDisconnect( db )
627 597
  598 +# and close it using the `pid`
  599 +monetdb.server.stop( pid )
  600 +
  601 +# end of lines of code to hold on to for all other `acs` monetdb analyses #
  602 +###########################################################################
  603 +
  604 +
628 605
629   -##################################################################
630   -# lines of code to hold on to for all other acs monetdb analyses #
631 606
632   -# first: your shell.exec() function. again, mine looks like this:
633   -shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
  607 +####################################################################
  608 +# lines of code to hold on to for all other `acs` monetdb analyses #
634 609
635   -# second: add a ten second system sleep in between the shell.exec() function
  610 +# first: specify your batfile. again, mine looks like this:
  611 +batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
  612 +
  613 +# second: run the MonetDB server
  614 +pid <- monetdb.server.start( batfile )
  615 +
  616 +# third: add a ten second system sleep in between the shell.exec() function
636 617 # and the database connection lines. this gives your local computer a chance
637 618 # to get monetdb up and running.
638 619 Sys.sleep( 10 )
639 620
640   -# third: your six lines to make a monet database connection.
  621 +# fourth: your six lines to make a monet database connection.
641 622 # just like above, mine look like this:
642 623 dbname <- "acs"
643 624 dbport <- 50001
644   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
645   -drv <- MonetDB( classPath = monetdriver )
646   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
647   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
648 625
649   -# end of lines of code to hold on to for all other acs monetdb analyses #
650   -#########################################################################
  626 +drv <- dbDriver("MonetDB")
  627 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  628 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
  629 +
  630 +
  631 +# # # # run your analysis commands # # # #
  632 +
  633 +
  634 +# disconnect from the current monet database
  635 +dbDisconnect( db )
  636 +
  637 +# and close it using the `pid`
  638 +monetdb.server.stop( pid )
  639 +
  640 +# end of lines of code to hold on to for all other `acs` monetdb analyses #
  641 +###########################################################################
651 642
652 643
653 644 # unlike most post-importation scripts, the monetdb directory cannot be set to read-only #
40 American Community Survey/2011 single-year - analysis examples.R
@@ -35,7 +35,7 @@
35 35
36 36
37 37 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
38   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  38 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
39 39
40 40
41 41 # after running the r script above, users should have handy a few lines
@@ -43,28 +43,31 @@ require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
43 43 # run them now. mine look like this:
44 44
45 45
46   -##################################################################
47   -# lines of code to hold on to for all other acs monetdb analyses #
  46 +####################################################################
  47 +# lines of code to hold on to for all other `acs` monetdb analyses #
48 48
49   -# first: your shell.exec() function. again, mine looks like this:
50   -shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
  49 +# first: specify your batfile. again, mine looks like this:
  50 +batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
51 51
52   -# second: add a twenty second system sleep in between the shell.exec() function
  52 +# second: run the MonetDB server
  53 +pid <- monetdb.server.start( batfile )
  54 +
  55 +# third: add a ten second system sleep in between the shell.exec() function
53 56 # and the database connection lines. this gives your local computer a chance
54 57 # to get monetdb up and running.
55   -Sys.sleep( 20 )
  58 +Sys.sleep( 10 )
56 59
57   -# third: your six lines to make a monet database connection.
  60 +# fourth: your six lines to make a monet database connection.
58 61 # just like above, mine look like this:
59 62 dbname <- "acs"
60 63 dbport <- 50001
61   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
62   -drv <- MonetDB( classPath = monetdriver )
63   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
64   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
65 64
66   -# end of lines of code to hold on to for all other acs monetdb analyses #
67   -#########################################################################
  65 +drv <- dbDriver("MonetDB")
  66 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  67 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
  68 +
  69 +
  70 +# # # # run your analysis commands # # # #
68 71
69 72
70 73 # the american community survey download and importation script
@@ -286,9 +289,16 @@ barplot(
286 289 close( acs.m )
287 290 close( acs.h )
288 291
289   -# close the connection to the monet database
  292 +
  293 +# disconnect from the current monet database
290 294 dbDisconnect( db )
291 295
  296 +# and close it using the `pid`
  297 +monetdb.server.stop( pid )
  298 +
  299 +# end of lines of code to hold on to for all other `acs` monetdb analyses #
  300 +###########################################################################
  301 +
292 302
293 303 # for more details on how to work with data in r
294 304 # check out my two minute tutorial video site
42 American Community Survey/2011 single-year - variable recode example.R
@@ -39,7 +39,7 @@
39 39
40 40
41 41 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
42   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  42 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
43 43 require(stringr) # load stringr package (manipulates character strings easily)
44 44
45 45
@@ -48,28 +48,31 @@ require(stringr) # load stringr package (manipulates character strings easily)
48 48 # run them now. mine look like this:
49 49
50 50
51   -##################################################################
52   -# lines of code to hold on to for all other acs monetdb analyses #
  51 +####################################################################
  52 +# lines of code to hold on to for all other `acs` monetdb analyses #
  53 +
  54 +# first: specify your batfile. again, mine looks like this:
  55 +batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
53 56
54   -# first: your shell.exec() function. again, mine looks like this:
55   -shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
  57 +# second: run the MonetDB server
  58 +pid <- monetdb.server.start( batfile )
56 59
57   -# second: add a twenty second system sleep in between the shell.exec() function
  60 +# third: add a ten second system sleep in between the shell.exec() function
58 61 # and the database connection lines. this gives your local computer a chance
59 62 # to get monetdb up and running.
60   -Sys.sleep( 20 )
  63 +Sys.sleep( 10 )
61 64
62   -# third: your six lines to make a monet database connection.
  65 +# fourth: your six lines to make a monet database connection.
63 66 # just like above, mine look like this:
64 67 dbname <- "acs"
65 68 dbport <- 50001
66   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
67   -drv <- MonetDB( classPath = monetdriver )
68   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
69   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
70 69
71   -# end of lines of code to hold on to for all other acs monetdb analyses #
72   -#########################################################################
  70 +drv <- dbDriver("MonetDB")
  71 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  72 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
  73 +
  74 +
  75 +# # # # run your analysis commands # # # #
73 76
74 77
75 78 # the american community survey download and importation script
@@ -240,7 +243,7 @@ save( acs.m.recoded.design , file = "C:/My Directory/ACS/recoded_acs2011_1yr.rda
240 243 # open r back up
241 244
242 245 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
243   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  246 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
244 247
245 248 # run your..
246 249 # lines of code to hold on to for all other acs monetdb analyses #
@@ -266,9 +269,16 @@ svytotal( ~one , acs.r , byvar = ~agecat )
266 269 # close the connection to the recoded sqlrepsurvey design object
267 270 close( acs.r )
268 271
269   -# close the connection to the monet database
  272 +
  273 +# disconnect from the current monet database
270 274 dbDisconnect( db )
271 275
  276 +# and close it using the `pid`
  277 +monetdb.server.stop( pid )
  278 +
  279 +# end of lines of code to hold on to for all other `acs` monetdb analyses #
  280 +###########################################################################
  281 +
272 282
273 283 # for more details on how to work with data in r
274 284 # check out my two minute tutorial video site
40 American Community Survey/replicate census estimates - 2011.R
@@ -41,35 +41,38 @@
41 41
42 42
43 43 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
44   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  44 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
45 45
46 46 # after running the r script above, users should have handy a few lines
47 47 # to initiate and connect to the monet database containing all american community survey tables
48 48 # run them now. mine look like this:
49 49
50 50
51   -##################################################################
52   -# lines of code to hold on to for all other acs monetdb analyses #
  51 +####################################################################
  52 +# lines of code to hold on to for all other `acs` monetdb analyses #
53 53
54   -# first: your shell.exec() function. again, mine looks like this:
55   -shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
  54 +# first: specify your batfile. again, mine looks like this:
  55 +batfile <- "C:/My Directory/ACS/MonetDB/acs.bat"
56 56
57   -# second: add a twenty second system sleep in between the shell.exec() function
  57 +# second: run the MonetDB server
  58 +pid <- monetdb.server.start( batfile )
  59 +
  60 +# third: add a ten second system sleep in between the shell.exec() function
58 61 # and the database connection lines. this gives your local computer a chance
59 62 # to get monetdb up and running.
60   -Sys.sleep( 20 )
  63 +Sys.sleep( 10 )
61 64
62   -# third: your six lines to make a monet database connection.
  65 +# fourth: your six lines to make a monet database connection.
63 66 # just like above, mine look like this:
64 67 dbname <- "acs"
65 68 dbport <- 50001
66   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
67   -drv <- MonetDB( classPath = monetdriver )
68   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
69   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
70 69
71   -# end of lines of code to hold on to for all other acs monetdb analyses #
72   -#########################################################################
  70 +drv <- dbDriver("MonetDB")
  71 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  72 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
  73 +
  74 +
  75 +# # # # run your analysis commands # # # #
73 76
74 77
75 78 # the american community survey download and importation script
@@ -187,9 +190,16 @@ svytotal( ~I( vacs %in% c( 2, 4 , 5 , 6 , 7 ) ) , acs.h ) # all other vacant
187 190 close( acs.m )
188 191 close( acs.h )
189 192
190   -# close the connection to the monet database
  193 +
  194 +# disconnect from the current monet database
191 195 dbDisconnect( db )
192 196
  197 +# and close it using the `pid`
  198 +monetdb.server.stop( pid )
  199 +
  200 +# end of lines of code to hold on to for all other `acs` monetdb analyses #
  201 +###########################################################################
  202 +
193 203
194 204 # for more details on how to work with data in r
195 205 # check out my two minute tutorial video site
131 Basic Stand Alone Medicare Claims Public Use Files/2008 - import all csv files into monetdb.R
@@ -36,12 +36,11 @@
36 36
37 37
38 38 # remove the # in order to run this install.packages line only once
39   -# install.packages( c( "downloader" , "R.utils" ) )
  39 +# install.packages( "R.utils" )
40 40
41 41
42   -require(downloader) # downloads and then runs the source() function on scripts from github
43 42 require(R.utils) # load the R.utils package (counts the number of lines in a file quickly)
44   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  43 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
45 44
46 45
47 46 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@@ -67,22 +66,6 @@ setwd( "C:/My Directory/BSAPUF/" )
67 66 year <- 2008
68 67
69 68
70   -# load the windows.monetdb.configuration() function,
71   -# which allows the easy creation of an executable (.bat) file
72   -# to run the monetdb server specific to this data
73   -source_url( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/windows.monetdb.configuration.R" )
74   -
75   -
76   -# create a folder "MonetDB" in your current working directory.
77   -# so, for example, if you set your current working directory to C:\My Directory\BSAPUF\ above,
78   -# create a new folder C:\My Directory\BSAPUF\MonetDB right now.
79   -
80   -
81   -# if the MonetDB folder doesn't exist in your current working directory,
82   -# this line will create an error.
83   -stopifnot( file.exists( paste0( getwd() , "/MonetDB" ) ) )
84   -
85   -
86 69 # note: the MonetDB folder should *not* be within a year-specific directory.
87 70 # multiple bsa puf years will all be stored into the same monet database,
88 71 # in order to allow multi-year analyses.
@@ -97,42 +80,43 @@ stopifnot( file.exists( paste0( getwd() , "/MonetDB" ) ) )
97 80 # when adding new files or adding a new year of data, this script does not need to be re-run.
98 81
99 82 # create a monetdb executable (.bat) file for the medicare basic stand alone public use file
100   -windows.monetdb.configuration(
101   -
102   - # choose a location to store the file that will run the monetdb server on your local computer
103   - # this can be stored anywhere, but why not put it in the monetdb directory
104   - bat.file.location = paste0( getwd() , "\\MonetDB\\monetdb.bat" ) ,
105   -
106   - # figure out the file path of the MonetDB software on your local machine.
107   - # on my windows machine, monetdb version 5.0 defaulted to this directory, but double-check yours:
108   - monetdb.program.path = "C:\\Program Files\\MonetDB\\MonetDB5\\" ,
109   -
110   - # assign the directory where the database will be stored.
111   - # this setting will store the database within the MonetDB folder of the current working directory
112   - database.directory = paste0( getwd() , "\\MonetDB\\" ) ,
113   -
114   - # create a server name for the dataset you want to save into monetdb.
115   - # this will change for different datasets -- for the basic stand alone public use file, just use bsapuf
116   - dbname = "bsapuf" ,
117   -
118   - # choose which port
119   - dbport = 50003
  83 +batfile <-
  84 + monetdb.server.setup(
  85 +
  86 + # set the path to the directory where the initialization batch file and all data will be stored
  87 + database.directory = "C:/My Directory/BSAPUF/MonetDB" ,
  88 + # must be empty or not exist
  89 +
  90 + # find the main path to the monetdb installation program
  91 + monetdb.program.path = "C:/Program Files/MonetDB/MonetDB5" ,
  92 +
  93 + # choose a database name
  94 + dbname = "bsapuf" ,
  95 +
  96 + # choose a database port
  97 + # this port should not conflict with other monetdb databases
  98 + # on your local computer. two databases with the same port number
  99 + # cannot be accessed at the same time
  100 + dbport = 50003
120 101 )
121 102
122   -
  103 +
123 104 # this next step is so very important.
124 105
125 106 # store a line of code that will make it easy to open up the monetdb server in the future.
126   -# this should contain the same file path as the "bat.file.location" parameter above,
127   -# but don't simply copy the "paste0( getwd() , "\\MonetDB\\monetdb.bat" )" here,
128   -# because if your current working directory changes at other points, you don't want this line to change.
  107 +# this should contain the same file path as the batfile created above,
129 108 # you're best bet is to actually look at your local disk to find the full filepath of the executable (.bat) file.
130   -# if it's stored in C:\My Directory\BSAPUF\MonetDB\monetdb.bat
131   -# then your shell.exec line should be:
  109 +# if you ran this script without changes, the batfile will get stored in C:\My Directory\BSAPUF\MonetDB\bsapuf.bat
132 110
  111 +# here's the batfile location:
  112 +batfile
133 113
134   -shell.exec( "C:/My Directory/BSAPUF/MonetDB/monetdb.bat" )
  114 +# note that since you only run the `monetdb.server.setup()` function the first time this script is run,
  115 +# you will need to note the location of the batfile for future MonetDB analyses!
135 116
  117 +# in future R sessions, you can create the batfile variable with a line like..
  118 +# batfile <- "C:/My Directory/BSAPUF/MonetDB/bsapuf.bat"
  119 +# obviously, without the `#` comment character
136 120
137 121 # hold on to that line for future scripts.
138 122 # you need to run this line *every time* you access
@@ -146,14 +130,17 @@ dbport <- 50003
146 130
147 131
148 132 # hey try running it now! a shell window should pop up.
149   -# when it runs, my computer shows:
  133 +pid <- monetdb.server.start( batfile )
  134 +# store the result into another variable, which stands for process id
  135 +# this `pid` variable will allow the MonetDB server to be terminated from within R automagically.
150 136
151   -# MonetDB 5 server v11.13.5 "Oct2012-SP1"
152   -# Serving database 'bsapuf', using 2 threads
  137 +# when the monetdb server runs, my computer shows:
  138 +# MonetDB 5 server v11.15.1 "Feb2013"
  139 +# Serving database 'bsapuf', using 8 threads
153 140 # Compiled for x86_64-pc-winnt/64bit with 64bit OIDs dynamically linked
154   -# Found 15.873 GiB available main-memory.
  141 +# Found 7.860 GiB available main-memory.
155 142 # Copyright (c) 1993-July 2008 CWI.
156   -# Copyright (c) August 2008-2012 MonetDB B.V., all rights reserved
  143 +# Copyright (c) August 2008-2013 MonetDB B.V., all rights reserved
157 144 # Visit http://www.monetdb.org/ for further information
158 145 # Listening for connection requests on mapi:monetdb://127.0.0.1:50003/
159 146 # MonetDB/JAQL module loaded
@@ -206,20 +193,16 @@ ipbs <- paste0( "./" , year , "/" , year , "_IPBS_PUF.csv" )
206 193 # end of files to download #
207 194
208 195
209   -# the monetdb installation instructions asked you to note the filepath of the monetdb java (.jar) file
210   -# you need it now. create a new 'monetdriver' object containing a character string
211   -# with the filepath of the java database connection file
212   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
213 196
214 197 # convert the driver to a monetdb driver
215   -drv <- MonetDB( classPath = monetdriver )
  198 +drv <- dbDriver("MonetDB")
216 199
217 200 # notice the dbname and dbport (assigned above during the monetdb configuration)
218 201 # get used in this line
219   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
  202 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
220 203
221 204 # now put everything together and create a connection to the monetdb server.
222   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
  205 +db <- dbConnect( drv , monet.url , "monetdb", "monetdb")
223 206 # from now on, the 'db' object will be used for r to connect with the monetdb server
224 207
225 208
@@ -341,26 +324,42 @@ dbGetQuery( db , "select * from pde08 limit 6" )
341 324 # disconnect from the current monet database
342 325 dbDisconnect( db )
343 326
  327 +# and close it using the `pid`
  328 +monetdb.server.stop( pid )
  329 +
344 330
345 331 ######################################################################
346 332 # lines of code to hold on to for all other bsa puf monetdb analyses #
347 333
348   -# first: your shell.exec() function. again, mine looks like this:
349   -shell.exec( "C:/My Directory/BSAPUF/MonetDB/monetdb.bat" )
  334 +# first: specify your batfile. again, mine looks like this:
  335 +batfile <- "C:/My Directory/BSAPUF/MonetDB/bsapuf.bat"
  336 +
  337 +# second: run the MonetDB server
  338 +pid <- monetdb.server.start( batfile )
350 339
351   -# second: add a ten second system sleep in between the shell.exec() function
  340 +# third: add a ten second system sleep in between the shell.exec() function
352 341 # and the database connection lines. this gives your local computer a chance
353 342 # to get monetdb up and running.
354 343 Sys.sleep( 10 )
355 344
356   -# third: your six lines to make a monet database connection.
  345 +# fourth: your six lines to make a monet database connection.
357 346 # just like above, mine look like this:
358 347 dbname <- "bsapuf"
359 348 dbport <- 50003
360   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
361   -drv <- MonetDB( classPath = monetdriver )
362   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
363   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
  349 +
  350 +drv <- dbDriver("MonetDB")
  351 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  352 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
  353 +
  354 +
  355 +# # # # run your analysis commands # # # #
  356 +
  357 +
  358 +# disconnect from the current monet database
  359 +dbDisconnect( db )
  360 +
  361 +# and close it using the `pid`
  362 +monetdb.server.stop( pid )
364 363
365 364 # end of lines of code to hold on to for all other bsa puf monetdb analyses #
366 365 #############################################################################
104 Basic Stand Alone Medicare Claims Public Use Files/2008 - replicate cms publications.R
@@ -36,7 +36,7 @@
36 36 # # # # # # # # # # # # # # #
37 37
38 38
39   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  39 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
40 40
41 41
42 42 # after running the r script above, users should have handy a few lines
@@ -44,25 +44,29 @@ require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
44 44 # basic stand alone public use files. run them now. mine look like this:
45 45
46 46
47   -######################################################################
48   -# lines of code to hold on to for all other bsa puf monetdb analyses #
49 47
50   -# first: your shell.exec() function. again, mine looks like this:
51   -shell.exec( "C:/My Directory/BSAPUF/MonetDB/monetdb.bat" )
  48 +###################################################################################
  49 +# lines of code to hold on to for the start of all other bsa puf monetdb analyses #
52 50
53   -# second: add a twenty second system sleep in between the shell.exec() function
  51 +# first: specify your batfile. again, mine looks like this:
  52 +batfile <- "C:/My Directory/BSAPUF/MonetDB/bsapuf.bat"
  53 +
  54 +# second: run the MonetDB server
  55 +pid <- monetdb.server.start( batfile )
  56 +
  57 +# third: add a ten second system sleep in between the shell.exec() function
54 58 # and the database connection lines. this gives your local computer a chance
55 59 # to get monetdb up and running.
56   -Sys.sleep( 20 )
  60 +Sys.sleep( 10 )
57 61
58   -# third: your six lines to make a monet database connection.
  62 +# fourth: your six lines to make a monet database connection.
59 63 # just like above, mine look like this:
60 64 dbname <- "bsapuf"
61 65 dbport <- 50003
62   -monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
63   -drv <- MonetDB( classPath = monetdriver )
64   -monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
65   -db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
  66 +
  67 +drv <- dbDriver("MonetDB")
  68 +monet.url <- paste0( "monetdb://localhost:" , dbport , "/" , dbname )
  69 +db <- dbConnect( drv , monet.url , "monetdb" , "monetdb" )
66 70
67 71 # end of lines of code to hold on to for all other bsa puf monetdb analyses #
68 72 #############################################################################
@@ -94,12 +98,27 @@ for ( i in pufs ){
94 98
95 99 # print the name of the current table
96 100 print( i )
  101 +
  102 +
  103 + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  104 + # create a monet.frame object (experimental, but designed to behave like an R data frame) #
  105 + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
  106 +
  107 + assign( i , monet.frame( db , i ) )
97 108
98 109 # print the number of records stored in that table
99   - print( dbGetQuery( db , paste( "select count(*) from" , i ) ) )
  110 + print(
  111 + paste(
  112 + "table" ,
  113 + i ,
  114 + "contains" ,
  115 + nrow( get( i ) ) ,
  116 + "rows"
  117 + )
  118 + )
100 119 }
101 120
102   -
  121 +# and now you can access each of those objects as if they were an R data frame #
103 122
104 123
105 124 # # # # # # # # # # # # # #
@@ -126,8 +145,10 @@ benes <-
126 145 # print this matrix to the screen
127 146 benes
128 147
129   -# examine the first six records of the home health agency (hha) table
130   -dbGetQuery( db , "select * from hha08 limit 6" )
  148 +# examine the first and last six records of the home health agency (hha) table
  149 +head( hha08 )
  150 +
  151 +tail( hha08 )
131 152
132 153 # create an 'hhusers' data frame, constructed by querying the monet database
133 154 hhusers <-
@@ -183,9 +204,13 @@ round( hhusers / benes , 4 )
183 204 # the following code will precisely match the 'total' (bottom) row in tables 5 and 6 of the inpatient documentation (pdf page 20)
184 205 # http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_BSA_Inpatient_Claims_PUF_GenDoc.pdf
185 206
186   -# examine the first six records of the 2008 inpatient claims (inpatient08) table
  207 +# examine the first six records of the 2008 inpatient claims (inpatient08) table using SQL..
187 208 dbGetQuery( db , "select * from inpatient08 limit 6" )
188 209
  210 +# ..or access the monet.frame object
  211 +head( inpatient08 )
  212 +
  213 +
189 214 # run a simple sql query on the inpatient claims table in the 2008 monet database
190 215 dbGetQuery(
191 216 db ,
@@ -213,8 +238,11 @@ dbGetQuery(
213 238 # http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_BSA_Inpatient_Claims_PUF_GenDoc.pdf
214 239
215 240
216   -# count the total number of claims in the monet data table
217   -total.claims <- dbGetQuery( db , "select count(*) from inpatient08" )
  241 +# count the total number of claims in the monet data table using SQL..
  242 +( total.claims <- dbGetQuery( db , "select count(*) from inpatient08" ) )
  243 +
  244 +# ..or as a monet.frame
  245 +nrow( inpatient08 )
218 246
219 247 # print the distinct values of the 'ip_clm_days_cd' column to the screen
220 248 dbGetQuery( db , "select distinct ip_clm_days_cd from inpatient08" )
@@ -258,11 +286,18 @@ table12
258 286 # the following code will precisely match the puf (rightmost) column in table 4 of the hospice documentation (pdf page 7)
259 287 # http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_BSA_Hospice_Bene_PUF_GenDoc.pdf
260 288
261   -# examine the first six records of the 2008 hospice enrollee (hospice08) table
  289 +# examine the first six records of the 2008 hospice enrollee (hospice08) table using SQL..
262 290 dbGetQuery( db , "select * from hospice08 limit 6" )
263 291
  292 +# ..or access it as a monet.frame
  293 +head( hospice08 )
  294 +
  295 +
264 296 # store the number of beneficiaries in hospice (remember this is about 5% of the total population)
265   -total.benes <- dbGetQuery( db , "select count(*) from hospice08" )
  297 +( total.benes <- dbGetQuery( db , "select count(*) from hospice08" ) )
  298 +
  299 +# same old same old
  300 +nrow( hospice08 )
266 301
267 302 # store the number of beneficiaries in hospice - in each sex category - into a data frame called 'table4'
268 303 table4 <-
@@ -322,9 +357,12 @@ snf.users.by.admissions$count / sum( snf.users.by.admissions$count )
322 357 # http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_BSA_Carrier_Line_Items_PUF_GenDoc.pdf
323 358
324 359
325   -# examine the first six records of the 2008 carrier line item (carrier08) table
  360 +# examine the first six records of the 2008 carrier line item (carrier08) table using SQL..
326 361 dbGetQuery( db , "select * from carrier08 limit 6" )
327 362
  363 +# ..or as a monet.frame
  364 +head( carrier08 )
  365 +
328 366 # count the total number of line items
329 367 # note: the 'medicare payments' also comes close to the 'medicare payments' column
330 368 # however doesn't match exactly, because the cms published number is pre-rounded (see table 2 on pdf page 7 for details)
@@ -340,9 +378,12 @@ dbGetQuery( db , "select count(*) as number_of_line_items , sum( car_hcpcs_pmt_a
340 378 # the following code will precisely match the distribution in table 10 of the prescription drug events documentation (pdf page 9)
341 379 # http://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_BSA_PD_Events_PUF_GenDoc.pdf
342 380
343   -# examine the first six records of the 2008 prescription drug events (pde08) table
  381 +# examine the first six records of the 2008 prescription drug events (pde08) table with SQL..
344 382 dbGetQuery( db , "select * from pde08 limit 6" )
345 383
  384 +# ..or with monet.frame
  385 +head( pde08 )
  386 +
346 387 # count the number of events shown in table 1 (on pdf page 2) and get close (but not perfect) to the total drug cost, due to rounding
347 388 table1 <- dbGetQuery( db , "select count(*) as num_events , sum( pde_drug_cost ) as drug_cost_sum from pde08" )
348 389
@@ -373,9 +414,12 @@ as.numeric( patient.payment.dist$L1 ) / as.numeric( table1[1] )
373 414 # the following code will precisely match the counts in table 5 of the chronic conditions puf general documentation (pdf page 13)
374 415 # https://www.cms.gov/Research-Statistics-Data-and-Systems/Statistics-Trends-and-Reports/BSAPUFS/Downloads/2008_Chronic_Conditions_PUF_GenDoc.pdf
375 416
376   -# examine the first six records of the 2008 chronic conditions (cc08) table
  417 +# examine the first six records of the 2008 chronic conditions (cc08) table using SQL..
377 418 dbGetQuery( db , "select * from cc08 limit 6" )
378 419
  420 +# ..or with monet.frame
  421 +head( cc08 )
  422 +
379 423 # create a character vector containing each of the data columns matching the enrollee columns in table 5
380 424 count.columns <-
381 425 c(
@@ -399,6 +443,18 @@ dbGetQuery( db , paste( "select bene_sex_ident_cd, " , sum.strings , "from cc08
399 443 dbGetQuery( db , paste( "select bene_sex_ident_cd, bene_age_cat_cd, " , sum.strings , "from cc08 group by bene_sex_ident_cd, bene_age_cat_cd" ) )
400 444
401 445
  446 +#################################################################################
  447 +# lines of code to hold on to for the end of all other bsa puf monetdb analyses #
  448 +
  449 +# disconnect from the current monet database
  450 +dbDisconnect( db )
  451 +
  452 +# and close it using the `pid`
  453 +monetdb.server.stop( pid )
  454 +
  455 +# end of lines of code to hold on to for all other bsa puf monetdb analyses #
  456 +#############################################################################
  457 +
402 458 # for more details on how to work with data in r
403 459 # check out my two minute tutorial video site
404 460 # http://www.twotorials.com/
172 Behavioral Risk Factor Surveillance System/1984 - 2011 download all microdata.R
@@ -52,7 +52,7 @@
52 52
53 53
54 54 require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
55   -require(RMonetDB) # load the RMonetDB package (connects r to a monet database)
  55 +require(MonetDB.R) # load the MonetDB.R package (connects r to a monet database)
56 56 require(foreign) # load foreign package (converts data files into R)
57 57 require(downloader) # downloads and then runs the source() function on scripts from github
58 58
@@ -65,75 +65,60 @@ require(downloader) # downloads and then runs the source() function on scripts
65 65 setwd( "C:/My Directory/BRFSS/" )
66 66
67 67
68   -
69   -# load the windows.monetdb.configuration() function,
70   -# which allows the easy creation of an executable (.bat) file
71   -# to run the monetdb server specific to this data
72   -source_url( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/windows.monetdb.configuration.R" )
73   -
74 68 # load the read.SAScii.monetdb() function,
75 69 # which imports ASCII (fixed-width) data files directly into a monet database
76 70 # using only a SAS importation script
77 71 source_url( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/read.SAScii.monetdb.R" )
78 72
79 73
80   -# create a folder "MonetDB" in your current working directory.
81   -# so, for example, if you set your current working directory to C:\My Directory\BRFSS\ above,
82   -# create a new folder C:\My Directory\BRFSS\MonetDB right now.
83   -
84   -
85   -# if the MonetDB folder doesn't exist in your current working directory,
86   -# this line will create an error.
87   -stopifnot( file.exists( paste0( getwd() , "/MonetDB" ) ) )
88   -
89   -
90 74 # configure a monetdb database for the brfss on windows #
91 75
92 76 # note: only run this command once. this creates an executable (.bat) file
93 77 # in the appropriate directory on your local disk.