Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

preliminary brfss update

  • Loading branch information...
commit b54cbe5dc05b9820b31fc38d525d2702a8cc2124 1 parent 19ec044
@ajdamico authored
View
247 Behavioral Risk Factor Surveillance System/1984 - 2011 download all microdata.R
@@ -0,0 +1,247 @@
+
+require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
+
+setwd( "C:/My Directory/BRFSS/" )
+
+
+source_https <- function(url, ...) {
+ # load package
+ require(RCurl)
+
+ # parse and evaluate each .R script
+ sapply(c(url, ...), function(u) {
+ eval(parse(text = getURL(u, followlocation = TRUE, cainfo = system.file("CurlSSL", "cacert.pem", package = "RCurl"))), envir = .GlobalEnv)
+ })
+}
+source_https( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/windows.monetdb.configuration.R" )
+source_https( "https://raw.github.com/ajdamico/usgsd/master/MonetDB/read.SAScii.monetdb.R" )
+
+
+stopifnot( file.exists( paste0( getwd() , "/MonetDB" ) ) )
+
+windows.monetdb.configuration(
+ bat.file.location = paste0( getwd() , "\\MonetDB\\monetdb.bat" ) ,
+ monetdb.program.path = "C:\\Program Files\\MonetDB\\MonetDB5\\" ,
+ database.directory = paste0( getwd() , "\\MonetDB\\" ) ,
+ dbname = "brfss" ,
+ dbport = 50003
+ )
+
+shell.exec( "C:/My Directory/BRFSS/MonetDB/monetdb.bat" )
+
+
+dbname <- "brfss"
+dbport <- 50003
+
+Sys.sleep( 20 )
+
+monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
+drv <- MonetDB( classPath = monetdriver )
+monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
+db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
+
+years.to.download <- 1984:2011
+
+
+
+
+
+
+require(foreign) # load foreign package (converts data files into R)
+
+
+
+tf <- tempfile()
+td <- tempdir()
+
+for ( year in intersect( years.to.download , 1984:2001 ) ){
+
+ tablename <- paste0( 'b' , year )
+
+ fn <-
+ ifelse( year < 1990 ,
+ paste0( "ftp://ftp.cdc.gov/pub/data/Brfss/CDBRFS" , substr( year , 3 , 4 ) , "_XPT.zip" ) ,
+ paste0( "ftp://ftp.cdc.gov/pub/data/Brfss/CDBRFS" , substr( year , 3 , 4 ) , "XPT.zip" )
+ )
+
+
+ download.file( fn , tf , mode = 'wb' )
+ local.fn <- unzip( tf , exdir = td )
+
+ x <- read.xport( local.fn )
+ names( x ) <- tolower( names( x ) )
+ write.csv( x , tf , row.names = FALSE )
+
+ # rows to check then read
+ rtctr <- nrow( x )
+
+ # reset all try-error objects
+ first.attempt <- second.attempt <- NULL
+
+ # first try it with NAs for NA strings
+ first.attempt <- try( monet.read.csv( db , tf , tablename , nrows = rtctr , na.strings = "NA" , nrow.check = rtctr ) , silent = TRUE )
+
+ # then try it with "" for NA strings
+ if( class( first.attempt ) == "try-error" ) {
+ write.csv( x , tf , row.names = FALSE , na = "" )
+
+ try( dbRemoveTable( db , tablename ) , silent = TRUE )
+
+ second.attempt <-
+ try( monet.read.csv( db , tf , tablename , nrows = rtctr , na.strings = "" , nrow.check = rtctr ) , silent = TRUE )
+ }
+
+ # if that still doesn't work, import the table manually
+ if( class( second.attempt ) == "try-error" ) {
+
+ try( dbRemoveTable( db , tablename ) , silent = TRUE )
+
+ colTypes <-
+ ifelse(
+ sapply( x , class ) == 'numeric' ,
+ 'DOUBLE PRECISION' ,
+ 'VARCHAR(255)'
+ )
+
+
+ colDecl <- paste( names( x ) , colTypes )
+
+ sql.create <-
+ sprintf(
+ paste(
+ "CREATE TABLE" ,
+ tablename ,
+ "(%s)"
+ ) ,
+ paste(
+ colDecl ,
+ collapse = ", "
+ )
+ )
+
+ # create the table in the database
+ dbSendUpdate( db , sql.create )
+
+ sql.update <-
+ paste0(
+ "copy " ,
+ rtctr ,
+ " offset 2 records into " ,
+ tablename ,
+ " from '" ,
+ tf ,
+ "' using delimiters ',' null as ''"
+ )
+
+ dbSendUpdate( db , sql.update )
+
+ }
+
+
+ file.remove ( local.fn )
+
+ rm( x )
+
+ gc()
+
+}
+
+
+for ( year in intersect( years.to.download , 2002:2011 ) ){
+
+ file.remove( tf )
+
+ if (year == 2011){
+
+ fn <- "ftp://ftp.cdc.gov/pub/data/brfss/LLCP2011ASC.ZIP"
+ sas_ri <- "http://www.cdc.gov/brfss/technical_infodata/surveydata/2011/SASOUT11_LLCP.SAS"
+
+ } else if ( year == 2002 ){
+
+ fn <- paste0( "ftp://ftp.cdc.gov/pub/data/brfss/CDBRFS" , year , "ASC.ZIP" )
+ sas_ri <- paste0( "http://www.cdc.gov/brfss/technical_infodata/surveydata/" , year , "/SASOUT" , substr( year , 3 , 4 ) , ".SAS" )
+
+ } else {
+
+ fn <- paste0( "ftp://ftp.cdc.gov/pub/data/brfss/CDBRFS" , substr( year , 3 , 4 ) , "ASC.ZIP" )
+ sas_ri <- paste0( "http://www.cdc.gov/brfss/technical_infodata/surveydata/" , year , "/SASOUT" , substr( year , 3 , 4 ) , ".SAS" )
+
+ }
+
+
+ z <- readLines( sas_ri )
+
+ if ( year == 2009 ) z <- z[ -159:-168 ]
+ if ( year == 2011 ) z <- z[ !grepl( "CHILDAGE" , z ) ]
+
+
+ z <- gsub( "_" , "x" , z , fixed = TRUE )
+ z <- z[ !grepl( "SEQNO" , z ) ]
+ z <- z[ !grepl( "IDATE" , z ) ]
+ z <- z[ !grepl( "PHONENUM" , z ) ]
+ z <- gsub( "\t" , " " , z , fixed = TRUE )
+ z <- gsub( "\f" , " " , z , fixed = TRUE )
+
+ writeLines( z , tf )
+
+ read.SAScii.monetdb (
+ fn ,
+ tf ,
+ beginline = 70 ,
+ zipped = T ,
+ tl = TRUE ,
+ tablename = paste0( 'b' , year ) ,
+ connection = db
+ )
+
+}
+
+
+
+
+# designate weight, psu, and stratification variables
+survey.vars <-
+ data.frame(
+ year = 1984:2011 ,
+ weight = c( rep( 'x_finalwt' , 10 ) , rep( 'xfinalwt' , 17 ) , 'xllcpwt' ) ,
+ psu = c( rep( 'x_psu' , 10 ) , rep( 'xpsu' , 18 ) ) ,
+ strata = c( rep( 'x_ststr' , 10 ) , rep( 'xststr' , 18 ) )
+ )
+
+# convert all columns in the survey.vars table to character strings,
+# except the first
+survey.vars[ , -1 ] <- sapply( survey.vars[ , -1 ] , as.character )
+
+
+
+for ( year in years.to.download ){
+
+ tablename <- paste0( "b" , year )
+ strata <- survey.vars[ survey.vars$year == year , 'strata' ]
+ psu <- survey.vars[ survey.vars$year == year , 'psu' ]
+ weight <- survey.vars[ survey.vars$year == year , 'weight' ]
+
+ dbSendUpdate( db , paste0( 'alter table ' , tablename , ' add column one int' ) )
+ dbSendUpdate( db , paste0( 'UPDATE ' , tablename , ' SET one = 1' ) )
+ dbSendUpdate( db , paste0( 'alter table ' , tablename , ' add column idkey int auto_increment' ) )
+
+ brfss.design <-
+ sqlsurvey(
+ weight = weight ,
+ nest = TRUE ,
+ strata = strata ,
+ id = psu ,
+ table.name = tablename ,
+ key = "idkey" ,
+ # check.factors = 10 , # defaults to ten
+ database = monet.url ,
+ driver = drv ,
+ user = "monetdb" ,
+ password = "monetdb"
+ )
+
+
+ save( brfss.design , file = paste( tablename , 'design.rda' ) )
+
+}
+
View
273 Behavioral Risk Factor Surveillance System/2010 single-year - variable recode example.R
@@ -0,0 +1,273 @@
+# analyze us government survey data with the r language
+# behavioral risk factor surveillance system
+# 2010 file
+
+# if you have never used the r language before,
+# watch this two minute video i made outlining
+# how to run this script from start to finish
+# http://www.screenr.com/Zpd8
+
+# anthony joseph damico
+# ajdamico@gmail.com
+
+# if you use this script for a project, please send me a note
+# it's always nice to hear about how people are using this stuff
+
+# for further reading on cross-package comparisons, see:
+# http://journal.r-project.org/archive/2009-2/RJournal_2009-2_Damico.pdf
+
+
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+#####################################################################################################################################
+# prior to running this analysis script, the brfss 2010 single-year file must be loaded as a monet database-backed sqlsurvey object #
+# on the local machine. running the 1984-2011 download and create database script will create a monet database containing this file #
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+# https://github.com/ajdamico/usgsd/blob/master/American%20Community%20Survey/2005-2011%20-%20download%20all%20microdata.R #
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+# that script will create a file "acs2011_1yr.rda" in C:/My Directory/ACS or wherever the working directory was set for the program #
+#####################################################################################################################################
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+
+# # # # # # # # # # # # # # #
+# warning: monetdb required #
+# # # # # # # # # # # # # # #
+
+
+# remove the # in order to run this install.packages line only once
+# install.packages( "stringr" )
+
+
+require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
+require(stringr) # load stringr package (manipulates character strings easily)
+
+
+# after running the r script above, users should have handy a few lines
+# to initiate and connect to the monet database containing all american community survey tables
+# run them now. mine look like this:
+
+
+##################################################################
+# lines of code to hold on to for all other acs monetdb analyses #
+
+# first: your shell.exec() function. again, mine looks like this:
+shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
+
+# second: add a twenty second system sleep in between the shell.exec() function
+# and the database connection lines. this gives your local computer a chance
+# to get monetdb up and running.
+Sys.sleep( 20 )
+
+# third: your six lines to make a monet database connection.
+# just like above, mine look like this:
+dbname <- "acs"
+dbport <- 50001
+monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
+drv <- MonetDB( classPath = monetdriver )
+monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
+db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
+
+# end of lines of code to hold on to for all other acs monetdb analyses #
+#########################################################################
+
+
+# the american community survey download and importation script
+# has already created a monet database-backed survey design object
+# connected to the 2011 single-year table
+
+# however, making any changes to the data table downloaded directly from the census bureau
+# currently requires directly accessing the table using dbSendUpdate() to run sql commands
+
+
+# note: recoding (writing) variables in monetdb often takes much longer
+# than querying (reading) variables in monetdb. therefore, it might be wise to
+# run all recodes at once, and leave your computer running overnight.
+
+
+# variable recodes on monet database-backed survey objects might be
+# more complicated than you'd expect, but it's far from impossible
+# three steps:
+
+
+
+##############################################################
+# step 1: connect to the acs data table you'd like to recode #
+# then make a copy so you don't lose the pristine original. #
+
+# the command above
+# db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
+# has already connected the current instance of r to the monet database
+
+# now simply copy you'd like to recode into a new table
+dbSendUpdate( db , "CREATE TABLE recoded_acs2011_1yr_m AS SELECT * FROM acs2011_1yr_m WITH DATA" )
+# this action protects the original 'acs2011_1yr_m' table from any accidental errors.
+# at any point, we can delete this recoded copy of the data table using the command..
+# dbRemoveTable( db , "recoded_acs2011_1yr_m" )
+# ..and start fresh by re-copying the pristine file from acs2011_1yr_m
+
+
+
+############################################
+# step 2: make all of your recodes at once #
+
+# from this point forward, all commands will only touch the
+# 'recoded_acs2011_1yr_m' table. the 'acs2011_1yr_m' is now off-limits.
+
+# add a new column. call it, oh i don't know, agecat?
+# since it's actually a categorical variable, make it VARCHAR( 255 )
+dbSendUpdate( db , "ALTER TABLE recoded_acs2011_1yr_m ADD COLUMN agecat VARCHAR( 255 )" )
+
+# if you wanted to create a numeric variable, substitute VARCHAR( 255 ) with DOUBLE PRECISION like this:
+# dbSendUpdate( db , "ALTER TABLE recoded_acs2011_1yr_m ADD COLUMN agecatx DOUBLE PRECISION" )
+# ..but then agecat would have to be be numbers (1 - 13) instead of the strings shown below ('01' - '13')
+
+
+# by hand, you could set the values of the agecat column anywhere between '01' and '13'
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '01' WHERE agep >= 0 AND agep < 5" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '02' WHERE agep >= 5 AND agep < 10" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '03' WHERE agep >= 10 AND agep < 15" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '04' WHERE agep >= 15 AND agep < 20" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '05' WHERE agep >= 20 AND agep < 25" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '06' WHERE agep >= 25 AND agep < 35" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '07' WHERE agep >= 35 AND agep < 45" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '08' WHERE agep >= 45 AND agep < 55" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '09' WHERE agep >= 55 AND agep < 60" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '10' WHERE agep >= 60 AND agep < 65" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '11' WHERE agep >= 65 AND agep < 75" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '12' WHERE agep >= 75 AND agep < 85" )
+dbSendUpdate( db , "UPDATE recoded_acs2011_1yr_m SET agecat = '13' WHERE agep >= 85 AND agep < 101" )
+
+
+# quickly check your work by running a simple SELECT COUNT(*) command with sql
+dbGetQuery( db , "SELECT agecat , agep , COUNT(*) as number_of_records from recoded_acs2011_1yr_m GROUP BY agecat , agep ORDER BY agep" )
+# and notice that each value of agep has been deposited in the appropriate age category
+
+
+# but all of that takes a while to write out.
+
+
+# since there's so much repeated text in the commands above,
+# let's create the same agecat variable (agecat2 this time)
+# with code you'll be able to modify a lot faster
+
+# remember, since it's actually a categorical variable, make the column type VARCHAR( 255 )
+dbSendUpdate( db , "ALTER TABLE recoded_acs2011_1yr_m ADD COLUMN agecat2 VARCHAR( 255 )" )
+
+
+# to automate things, just create a vector of each age bound
+agebounds <- c( 0 , 5 , 10 , 15 , 20 , 25 , 35 , 45 , 55 , 60 , 65 , 75 , 85 , 101 )
+# and loop through each interval, plugging in a new agecat for each value
+
+# start at the value '0' and end at the value '85' -- as opposed to the ceiling of 101.
+for ( i in 1:( length( agebounds ) - 1 ) ){
+
+ # build the sql string to pass to monetdb
+ update.sql.string <- paste0( "UPDATE recoded_acs2011_1yr_m SET agecat2 = '" , str_pad( i , 2 , pad = '0' ) , "' WHERE agep >= " , agebounds[ i ] , " AND agep < " , agebounds[ i + 1 ] )
+
+ # take a look at the update.sql.string you've just built. familiar? ;)
+ print( update.sql.string )
+
+ # now actually run the sql string
+ dbSendUpdate( db , update.sql.string )
+}
+
+
+# check your work by running a simple SELECT COUNT(*) command with sql
+dbGetQuery( db , "SELECT agecat , agecat2 , COUNT(*) as number_of_records from recoded_acs2011_1yr_m GROUP BY agecat , agecat2 ORDER BY agecat" )
+# and notice that there aren't any records where agecat does not equal agecat2
+
+
+
+#############################################################################
+# step 3: create a new survey design object connecting to the recoded table #
+
+# to initiate a new complex sample survey design on the data table
+# that's been recoded to include 'agecat"
+# simply re-run the sqlrepsurvey() function and update the table.name =
+# argument so it now points to the recoded_ table in the monet database
+
+# note: this takes a while. depending on how slowly the dots move across your screen,
+# you may want to leave it running overnight. i did warn you to run all of your recodes at once, didn't i?
+
+# create a sqlrepsurvey complex sample design object
+# using the *recoded* merged (household+person) table
+
+acs.m.recoded.design <-
+ sqlrepsurvey(
+ weight = 'pwgtp' ,
+ repweights = paste0( 'pwgtp' , 1:80 ) ,
+ scale = 4 / 80 ,
+ rscales = rep( 1 , 80 ) ,
+ mse = TRUE ,
+ table.name = "recoded_acs2011_1yr_m" , # note the solitary change here
+ key = "idkey" ,
+ # check.factors = 10 ,
+ database = monet.url ,
+ driver = drv ,
+ user = "monetdb" ,
+ password = "monetdb"
+ )
+
+
+# sqlite database-backed survey objects are described here:
+# http://faculty.washington.edu/tlumley/survey/svy-dbi.html
+# monet database-backed survey objects are similar, but:
+# the database engine is, well, blazingly faster
+# the setup is kinda more complicated (but all done for you)
+
+
+
+# save this new complex sample survey design
+# into an r data file (.rda) that can now be
+# analyzed quicker than anything else.
+# unless you've set your working directory elsewhere,
+# spell out the entire filepath to the .rda file
+# use forward slashes instead of backslashes
+save( acs.m.recoded.design , file = "C:/My Directory/ACS/recoded_acs2011_1yr.rda" )
+
+
+# # # # # # # # # # # # # # # # #
+# you've completed your recodes #
+# # # # # # # # # # # # # # # # #
+
+# everything's peaches and cream from here on in.
+
+# to analyze your newly-recoded year of data:
+
+# close r
+
+# open r back up
+
+require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
+
+# run your..
+# lines of code to hold on to for all other acs monetdb analyses #
+# (the same block of code i told you to hold onto at the end of the download script)
+
+# load your new the survey object
+
+load( "C:/My Directory/ACS/recoded_acs2011_1yr.rda" )
+
+
+# connect the recoded complex sample design to the monet database #
+acs.r <- open( acs.m.recoded.design , driver = drv , user = "monetdb" , password = "monetdb" ) # recoded
+
+# ..and now you can exactly match the age categories provided by the census bureau at..
+# http://www.census.gov/acs/www/Downloads/data_documentation/pums/Estimates/pums_estimates_11.lst #
+# with one measly command:
+
+svytotal( ~one , acs.r , byvar = ~agecat )
+
+
+# are we done here? yep, we're done.
+
+# close the connection to the recoded sqlrepsurvey design object
+close( acs.r )
+
+# close the connection to the monet database
+dbDisconnect( db )
+
+
+# for more details on how to work with data in r
+# check out my two minute tutorial video site
+# http://www.twotorials.com/
View
293 Behavioral Risk Factor Surveillance System/2011 single-year - analysis examples.R
@@ -0,0 +1,293 @@
+# analyze us government survey data with the r language
+# american community survey
+# 2011 person and household files
+
+# if you have never used the r language before,
+# watch this two minute video i made outlining
+# how to run this script from start to finish
+# http://www.screenr.com/Zpd8
+
+# anthony joseph damico
+# ajdamico@gmail.com
+
+# if you use this script for a project, please send me a note
+# it's always nice to hear about how people are using this stuff
+
+# for further reading on cross-package comparisons, see:
+# http://journal.r-project.org/archive/2009-2/RJournal_2009-2_Damico.pdf
+
+
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+#####################################################################################################################################
+# prior to running this analysis script, the acs 2011 single-year file must be loaded as a monet database-backed sqlsurvey object #
+# on the local machine. running the 2005-2011 download and create database script will create a monet database containing this file #
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+# https://github.com/ajdamico/usgsd/blob/master/American%20Community%20Survey/2005-2011%20-%20download%20all%20microdata.R #
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+# that script will create a file "acs2011_1yr.rda" in C:/My Directory/ACS or wherever the working directory was set for the program #
+#####################################################################################################################################
+# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
+
+
+# # # # # # # # # # # # # # #
+# warning: monetdb required #
+# # # # # # # # # # # # # # #
+
+
+require(sqlsurvey) # load sqlsurvey package (analyzes large complex design surveys)
+
+
+# after running the r script above, users should have handy a few lines
+# to initiate and connect to the monet database containing all american community survey tables
+# run them now. mine look like this:
+
+
+##################################################################
+# lines of code to hold on to for all other acs monetdb analyses #
+
+# first: your shell.exec() function. again, mine looks like this:
+shell.exec( "C:/My Directory/ACS/MonetDB/monetdb.bat" )
+
+# second: add a twenty second system sleep in between the shell.exec() function
+# and the database connection lines. this gives your local computer a chance
+# to get monetdb up and running.
+Sys.sleep( 20 )
+
+# third: your six lines to make a monet database connection.
+# just like above, mine look like this:
+dbname <- "acs"
+dbport <- 50001
+monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
+drv <- MonetDB( classPath = monetdriver )
+monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
+db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
+
+# end of lines of code to hold on to for all other acs monetdb analyses #
+#########################################################################
+
+
+# the american community survey download and importation script
+# has already created a monet database-backed survey design object
+# connected to the 2011 single-year table
+
+# sqlite database-backed survey objects are described here:
+# http://faculty.washington.edu/tlumley/survey/svy-dbi.html
+# monet database-backed survey objects are similar, but:
+# the database engine is, well, blazingly faster
+# the setup is kinda more complicated (but all done for you)
+
+# since this script only loads one file off of the local drive,
+# there's no need to set the working directory.
+# instead, simply use the full filepath to the r data file (.rda)
+# as shown in the load() examples below.
+
+# choose which file in your ACS directory to analyze:
+# one-year, three-year, or five-year file from any of the available years.
+# this script replicates the 2011 single-year estimates,
+# so leave that line uncommented and the other three choices commented out.
+
+# load the desired american community survey monet database-backed complex sample design objects
+
+load( 'C:/My Directory/ACS/acs2011_1yr.rda' ) # analyze the 2011 single-year acs
+# load( 'C:/My Directory/ACS/acs2010_1yr.rda' ) # analyze the 2010 single-year acs
+# load( 'C:/My Directory/ACS/acs2010_3yr.rda' ) # analyze the 2008-2010 three-year acs
+# load( 'C:/My Directory/ACS/acs2010_5yr.rda' ) # analyze the 2006-2010 five-year acs
+
+# note: this r data file should already contain both the merged (person + household) and household-only designs
+
+
+# connect the complex sample designs to the monet database #
+acs.m <- open( acs.m.design , driver = drv , user = "monetdb" , password = "monetdb" ) # merged design
+acs.h <- open( acs.h.design , driver = drv , user = "monetdb" , password = "monetdb" ) # household-only design
+
+
+
+################################################
+# ..and immediately start the example analyses #
+################################################
+
+# count the total (unweighted) number of records in acs #
+
+# simply use the nrow function..
+nrow( acs.m )
+
+# ..on the sqlrepsurvey design object
+class( acs.m )
+
+
+# since the acs gets loaded as a monet database-backed survey object instead of a data frame,
+# the number of unweighted records cannot be calculated by running the nrow() function on a data frame.
+
+# running the nrow() function on the database connection object
+# simply produces an error..
+# nrow( db )
+
+# because the monet database might contain multiple data tables
+class( db )
+
+
+# instead, perform the same unweighted count directly from the sql table
+# stored inside the monet database on your hard disk (as opposed to RAM)
+dbGetQuery( db , "SELECT COUNT(*) AS num_records FROM acs2011_1yr_m" )
+
+
+
+# count the total (unweighted) number of records in acs #
+# broken out by state #
+
+# note: this is easiest by simply running a sql query on the monet database directly
+dbGetQuery( db , "SELECT st , COUNT(*) as num_records FROM acs2011_1yr_m GROUP BY st" )
+
+
+
+# count the weighted number of individuals in acs #
+
+# the population of the united states (including group quarters residents: both institionalized and non-institutionalized) #
+svytotal( ~one , acs.m )
+
+# note that this is exactly equivalent to summing up the weight variable
+# from the original database (.db) file connection
+dbGetQuery( db , "SELECT SUM( pwgtp ) AS sum_weights FROM acs2011_1yr_m" )
+
+
+# the population of the united states #
+# by state
+svytotal( ~one , acs.m , byvar = ~st )
+# note: the above command is one example of how the r survey package differs from the r sqlsurvey package
+
+
+# calculate the mean of a linear variable #
+
+# average age - nationwide
+svymean( ~agep , acs.m )
+
+# by state
+svymean( ~agep , acs.m , byvar = ~st )
+
+
+# calculate the distribution of a categorical variable #
+
+# HICOV has been converted to a factor (categorical) variable
+# instead of a numeric (linear) variable,
+# because it only contains the values 1 and 2.
+# when the acs.m object was created with the function sqlrepdesign()
+# the check.factors parameter was left at the default of ten,
+# meaning all numeric columns with ten or fewer distinct values
+# would be automatically converted to factors
+
+# percent uninsured - nationwide
+svymean( ~hicov , acs.m )
+
+# by state
+svymean( ~hicov , acs.m , byvar = ~st )
+
+
+# calculate the median and other percentiles #
+
+# median age of residents of the united states
+svyquantile( ~agep , acs.m , , quantiles = 0.5 , se = T )
+
+# note two additional differences between the sqlsurvey and survey packages..
+
+# ..sqlrepsurvey designs do not allow multiple quantiles. instead,
+# loop through and print or save multiple quantiles, simply use a for loop
+
+# loop through the 25th, 50th, and 75th quantiles and print each result to the screen
+for ( i in c( .25 , .5 , .75 ) ) print( svyquantile( ~agep , acs.m , quantiles = i , se = TRUE ) )
+
+
+# ..sqlrepsurvey designs do not allow byvar arguments, meaning the only way to
+# calculate quantiles by state would be by creating subsets for each subpopulation
+# and calculating the quantiles for them independently:
+
+######################
+# subsetting example #
+######################
+
+# restrict the acs.m object to females only
+acs.m.female <- subset( acs.m , sex == 2 )
+
+# now any of the above commands can be re-run
+# using the acs.m.female object
+# instead of the acs.m object
+# in order to analyze females only
+
+# calculate the mean of a linear variable #
+
+# average age - nationwide, restricted to females
+svymean( ~agep , acs.m.female )
+
+# median age - nationwide, restricted to females
+svyquantile( ~agep , acs.m.female , quantiles = 0.5 , se = T )
+
+
+
+###################
+# export examples #
+###################
+
+# calculate the distribution of a categorical variable #
+# by region of the country
+
+# store the results into a new object
+
+coverage.by.region <- svymean( ~hicov , acs.m , byvar = ~region )
+
+# print the results to the screen
+coverage.by.region
+
+# now you have the results saved into a new object of type "svyrepstat"
+class( coverage.by.region )
+
+# print only the statistics (coefficients) to the screen
+coef( coverage.by.region )
+
+# print only the standard errors to the screen
+SE( coverage.by.region )
+
+# this object can be coerced (converted) to a data frame..
+coverage.by.region <- data.frame( coverage.by.region )
+
+# ..and then immediately exported as a comma-separated value file
+# into your current working directory
+write.csv( coverage.by.region , "coverage by region.csv" )
+
+# ..or trimmed to only contain the values you need.
+# here's the uninsured percentage by region,
+# with accompanying standard errors
+uninsured.rate.by.region <-
+ coverage.by.region[ substr( rownames( coverage.by.region) , 1 , 2 ) == "2:" , ]
+
+
+# print the new results to the screen
+uninsured.rate.by.region
+
+# this can also be exported as a comma-separated value file
+# into your current working directory
+write.csv( uninsured.rate.by.region , "uninsured rate by region.csv" )
+
+# ..or directly made into a bar plot
+barplot(
+ uninsured.rate.by.region[ , 1 ] ,
+ main = "Uninsured Rate by Region of the Country" ,
+ names.arg = c( "Northeast" , "Midwest" , "South" , "West" ) ,
+ ylim = c( 0 , .25 )
+)
+
+
+############################
+# end of analysis examples #
+############################
+
+
+# close the connection to the two sqlrepsurvey design objects
+close( acs.m )
+close( acs.h )
+
+# close the connection to the monet database
+dbDisconnect( db )
+
+
+# for more details on how to work with data in r
+# check out my two minute tutorial video site
+# http://www.twotorials.com/
View
BIN  ...rveillance System/WEAT 2010 Alcohol Consumption by Gender - Crosstab Analysis Results.pdf
Binary file not shown
View
BIN  ...l Risk Factor Surveillance System/WEAT 2010 Asthma Status - Crosstab Analysis Results.pdf
Binary file not shown
View
81 Behavioral Risk Factor Surveillance System/replicate cdc weat - 2010.R
@@ -0,0 +1,81 @@
+library(sqlsurvey)
+
+
+shell.exec( "C:/My Directory/BRFSS/MonetDB/monetdb.bat" )
+
+
+dbname <- "brfss"
+dbport <- 50003
+
+Sys.sleep( 20 )
+
+monetdriver <- "c:/program files/monetdb/monetdb5/monetdb-jdbc-2.7.jar"
+drv <- MonetDB( classPath = monetdriver )
+monet.url <- paste0( "jdbc:monetdb://localhost:" , dbport , "/" , dbname )
+db <- dbConnect( drv , monet.url , user = "monetdb" , password = "monetdb" )
+
+
+# manual fix of the open() method for the sqlsurvey() function
+open.sqlsurvey<-function(con, driver, ...){
+ con$conn<-dbConnect(driver, url=con$dbname,...)
+ if (!is.null(con$subset)){
+ con$subset$conn<-con$conn
+ }
+ con
+}
+# this bug has been reported to the sqlsurvey package author
+
+
+load( 'C:/My Directory/BRFSS/b2010 design.rda' )
+brfss.d <- open( brfss.design , driver = drv , user = "monetdb" , password = "monetdb" )
+
+
+# calculate unweighted sample size column
+dbGetQuery(
+ db ,
+ 'select
+ xasthmst , count(*) as sample_size
+ from
+ b2010
+ group by
+ xasthmst
+ order by
+ xasthmst'
+)
+
+# run the row and S.E. of row % columns
+# print the row percent column to the screen
+( row.pct <- svymean( ~xasthmst , brfss.d , se = TRUE ) )
+
+# extract the covariance matrix attribute from the svymean() output
+# take only the values of the diagonal (which contain the variances of each value)
+# square root them all to calculate the standard error
+# save the result into the se.row.pct object and at the same time
+# print the standard errors of the row percent column to the screen
+# ( by surrounding the assignment command with parentheses )
+( se.row.pct <- sqrt( diag( attr( row.pct , 'var' ) ) ) )
+
+# confidence interval lower bounds for row percents
+row.pct - qnorm( 0.975 ) * se.row.pct
+
+# confidence interval upper bounds for row percents
+row.pct + qnorm( 0.975 ) * se.row.pct
+
+# run the sample size and S.E. of weighted size columns
+# print the sample size (weighted) column to the screen
+( sample.size <- svytotal( ~xasthmst , brfss.d , se = TRUE ) )
+
+
+# extract the covariance matrix attribute from the svymean() output
+# take only the values of the diagonal (which contain the variances of each value)
+# square root them all to calculate the standard error
+# save the result into the se.sample.size object and at the same time
+# print the standard errors of the weighted size column to the screen
+# ( by surrounding the assignment command with parentheses )
+( se.sample.size <- sqrt( diag( attr( sample.size , 'var' ) ) ) )
+
+# confidence interval lower bounds for weighted size
+sample.size - qnorm( 0.975 ) * se.sample.size
+
+# confidence interval upper bounds for weighted size
+sample.size + qnorm( 0.975 ) * se.sample.size
View
59 Behavioral Risk Factor Surveillance System/usage script.R
@@ -1,59 +0,0 @@
-library(RSQLite)
-library(survey)
-options( survey.lonely.psu = "adjust" )
-
-
-brfss <-
- svydesign(
- id = ~xpsu ,
- strata = ~xststr ,
- nest = TRUE ,
- weights = ~xllcpwt ,
- data = 'b11' ,
- dbtype = "SQLite" ,
- dbname = "s:/temp/temp.db"
- )
-
-
-
-brfss <-
- svydesign(
- id = ~xpsu ,
- strata = ~xststr ,
- nest = TRUE ,
- weights = ~xfinalwt ,
- data = 'b10' ,
- dbtype = "SQLite" ,
- dbname = "s:/temp/temp.db"
- )
-
-# does this work?
-save( brfss , file = "s:/temp/brfss10.rda" )
-
-db <- dbConnect( SQLite() , "s:/temp/temp.db" )
-x <- dbReadTable( db , 'b10' )
-
-
-brfss <-
- svydesign(
- id = ~xpsu ,
- strata = ~xststr ,
- nest = TRUE ,
- weights = ~xfinalwt ,
- data = x
- )
-# this object can also be saved... is that faster?
-# otherwise use MonetDB!
-
-unwtd.count( ~age , brfss )
-unwtd.count( ~sex , brfss )
-svyby( ~age , ~sex , brfss , unwtd.count )
-( a <- svymean( ~factor( sex ) , brfss ) )
-confint( a )
-
-
-PROC CROSSTAB DATA=(your data filename) FILETYPE=SAS DESIGN=WR;
-NEST _STSTR _PSU / MISSUNIT;
-WEIGHT _finalwt;
-SUBGROUP FAIRPOOR;
-LEVELS 2;
View
2  Survey of Income and Program Participation/2008 panel - download and create database.R
@@ -39,7 +39,7 @@ setwd( "C:/My Directory/SIPP/" )
SIPP.dbname <- "SIPP08.db" # choose the name of the database (.db) file on the local disk
sipp.core.waves <- 1:10 # either choose which core survey waves to download, or set to null
-sipp.replicate.waves <- 1:9 # either choose which replicate weight waves to download, or set to null
+sipp.replicate.waves <- 1:10 # either choose which replicate weight waves to download, or set to null
sipp.topical.modules <- 1:9 # either choose which topical modules to download, or set to NULL
sipp.longitudinal.weights <- TRUE # set to FALSE to prevent download
sipp.cy.longitudinal.replicate.weights <- paste0( 'cy' , c( "09" , "10" ) ) # reads in 2009-2010
Please sign in to comment.
Something went wrong with that request. Please try again.