diff --git a/DESCRIPTION b/DESCRIPTION
index 8ec2e74..3a24e6c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: dplyr
 Type: Package
 Title: dplyr: a grammar of data manipulation
-Version: 0.1.2
+Version: 0.1.3
 Author: Hadley Wickham <h.wickham@gmail.com>,
     Romain Francois <romain@r-enthusiasts.com>
 Maintainer: Hadley Wickham <h.wickham@gmail.com>
@@ -16,7 +16,7 @@ Suggests: hflights, RSQLite, RSQLite.extfuns, RMySQL, RPostgreSQL,
         ggplot2, mgcv
 VignetteBuilder: knitr
 LazyData: yes
-LinkingTo: Rcpp (>= 0.11.0), BH (>= 1.51.0-2)
+LinkingTo: Rcpp (>= 0.11.1), BH (>= 1.51.0-2)
 License: MIT + file LICENSE
 Collate: 'RcppExports.R' 'all-equal.r' 'base.R' 'bench-compare.r'
         'cbind.r' 'chain.r' 'compute-collect.r' 'copy-to.r'
@@ -35,7 +35,7 @@ Collate: 'RcppExports.R' 'all-equal.r' 'base.R' 'bench-compare.r'
         'translate-sql-helpers.r' 'translate-sql-base.r'
         'translate-sql-window.r' 'translate-sql.r' 'type-sum.r'
         'utils-format.r' 'utils.r' 'view.r' 'zzz.r'
-Packaged: 2014-02-24 14:57:47 UTC; hadley
+Packaged: 2014-03-14 21:16:41 UTC; hadley
 NeedsCompilation: yes
 Repository: CRAN
-Date/Publication: 2014-02-24 16:36:07
+Date/Publication: 2014-03-15 00:36:22
diff --git a/MD5 b/MD5
index a0e573b..022a38a 100644
--- a/MD5
+++ b/MD5
@@ -1,6 +1,6 @@
-9562a9616e03bc929613661404fcfa2b *DESCRIPTION
+ffb471b74ddb3ed14c68b94e80c8bd2f *DESCRIPTION
 efcdf377730211753577de79ade6efdd *LICENSE
-f85798b8637ba953e3809a652e16bb9e *NAMESPACE
+dfa9b298237419bb0f9ae6b0ba2fb481 *NAMESPACE
 a3c77dc900f7307b5c9be0f028c98c54 *R/RcppExports.R
 05ca53535d5563dad7a4ab2977d38981 *R/all-equal.r
 ddb4cf3f09b89bda6a1c338bad01eaaf *R/base.R
@@ -14,7 +14,7 @@ ebb12675ad0f086ae1f96b5853f03e1c *R/data-hflights.r
 5744860420ce850f3f6f83c02d0d59d4 *R/data-nasa.r
 dbc4f2616f56258bcf6b76f71fa826ea *R/data-temp.r
 273c08017cd317e97b96410c2474924c *R/data.r
-4136061f3154d4981dada3bca96f80e0 *R/dbi-s3.r
+443e253d381bd462e77b8b6e5ba67ece *R/dbi-s3.r
 2ad6e93a659bdb0307275e035028b20d *R/desc.r
 10f579a57cc418ae8e83eca8c265b0cc *R/do.r
 58978c7bf590a7369381542205230dcd *R/dplyr.r
@@ -28,14 +28,14 @@ d6c7d60c41541ff6f61ef15065efc869 *R/grouped-dt.r
 acb7601f471d804cf744a93944ae5e24 *R/inline.r
 2e8f63b45d4c33f74238609015ec43f6 *R/join-df.r
 fdad6d946b904a191bd0e82ac7addcdb *R/join-dt.r
-bdadffe13d2a6ac6b720f606b04919c4 *R/join-sql.r
+9a0977d3fa2c7dd664942940de0d1f04 *R/join-sql.r
 4294e1dcc0e52baadb5924d69ba7723f *R/join.r
 284f0bc5a4ab77b391101a4bd076cd8e *R/lead-lag.R
 fcf6a838baefa7aa2baafdd320905ff2 *R/location.R
 e77ce019a92042c7e9aff43b471b274c *R/manip-cube.r
 8f9a897715df8722119073e827f541dd *R/manip-df.r
-35898e73ac28af17c9ecc267dd5a044b *R/manip-dt.r
-32dd0477fa684a840bc0c1429956e6df *R/manip-grouped-dt.r
+cad6208c82713597384ad24dd8626e91 *R/manip-dt.r
+58dbefbbeef55f115ae53f789a8b1c70 *R/manip-grouped-dt.r
 da6282cdc3e02970636b1f26db808cb8 *R/manip-sql.r
 a3d5227861d70494c3f4104d7ab1c274 *R/manip.r
 be30476e59730e96b59f0441bf618e2e *R/nth-value.R
@@ -69,7 +69,7 @@ a0d09b171ea3b2ae82265876b9bedec3 *R/tbl-df.r
 34f0eea158e652e12db73e9c39e2f884 *R/translate-sql-window.r
 b15b25978391ae58cac82a91b659f865 *R/translate-sql.r
 e51520b2617e10c424fd0b45beaa259d *R/type-sum.r
-85f584c1d3d83e9c3f7f9bc3866e17b2 *R/utils-format.r
+ab8a65154aade259dba4b996121aab8c *R/utils-format.r
 6491444d141099b758785078e6ccaf5b *R/utils.r
 4d9128b66f58cee5fcb57a8e470c82c4 *R/view.r
 1de6fcb5d9c556ccd931f51f425d946d *R/zzz.r
@@ -80,21 +80,21 @@ db40a0145d2a88069865e7f18d3dcf1f *data/nasa.rda
 f3b987de99285483ea0cae5028207cfe *demo/bench-merge.R
 d328eee82d3c54cb13af35bb696caaa6 *demo/bench-rbind.R
 d6bff9b2006cfdf6fe61a1b6cffcd285 *demo/bench-set.R
-ee27bbc32ea6fa4f471537ab2a629450 *inst/doc/databases.R
-fec565e75f4edbf88fd294bf88fe4183 *inst/doc/databases.Rmd
-3cc882bcfad57b5abf4b651d7f4ce4f9 *inst/doc/databases.html
+f2c42b95eb8ebac57ee3bb91a62c4324 *inst/doc/databases.R
+3503bfe827ec6123450c6de9da45f445 *inst/doc/databases.Rmd
+d0119a9073373dad4968fcc0c4993320 *inst/doc/databases.html
 53c28c818e61a19907db6a0b76786943 *inst/doc/hybrid-evaluation.R
 5b90937d6069340c27f29589f8dd478f *inst/doc/hybrid-evaluation.Rmd
 35e707c0233a6d87e5aec274bf8cb32a *inst/doc/hybrid-evaluation.html
 7880dcbbc2ef5a9066a47fdf72d9b53f *inst/doc/introduction.R
-05e037a31750ef8fd422dcff0b40b111 *inst/doc/introduction.Rmd
-da8a118d325a2cf376a1f8f70b994db6 *inst/doc/introduction.html
-aaa2bd4eeb0c7ba04eedaf49172b523e *inst/doc/window-functions.R
-ff626a70b5119f8bcbe51f06cf57f892 *inst/doc/window-functions.Rmd
-ae44f8b30264822383b34920767f7b9b *inst/doc/window-functions.html
+fc3fd6d6c9a7bae5339eba50b18e0279 *inst/doc/introduction.Rmd
+8d930dd5c36db6a1e44572eee4048335 *inst/doc/introduction.html
+6ee55a112c571a3ffa65271b6a6946b5 *inst/doc/window-functions.R
+18c6d2bdc11f9774d76bbc372b0c23ec *inst/doc/window-functions.Rmd
+9d0ebffd3c9c19aaf3893d852abe0f03 *inst/doc/window-functions.html
 2e55b2b2c065264bf676715799917446 *inst/include/dplyr.h
 d712283197ffe37c10fb60ef728a4652 *inst/include/dplyr/BoolResult.h
-38447847a9619459158c71a8d3a362e6 *inst/include/dplyr/Collecter.h
+95bbd7e79710004cbcea0d4c3e864473 *inst/include/dplyr/Collecter.h
 a95ac349adb05141ec17bc25ce8dcfd9 *inst/include/dplyr/DataFrameJoinVisitors.h
 72ae8dca809b468f4c674062b8d90079 *inst/include/dplyr/DataFrameVisitors.h
 e7f539d2e686294c257d5dd6edab558b *inst/include/dplyr/DataFrameVisitorsIndexMap.h
@@ -102,9 +102,9 @@ ec9e401da57308dad4df9f919a70262b *inst/include/dplyr/DataFrameVisitorsIndexSet.h
 9c2ddb8a034638bbf84c729edb7764e1 *inst/include/dplyr/EmptySubset.h
 0b201d156dd149ac2214246d83e0587c *inst/include/dplyr/FullDataFrame.h
 07d812af781c7a597cf4e7cc7d1f8037 *inst/include/dplyr/Gatherer.h
-180bf19356abec5515724dbd863af7b6 *inst/include/dplyr/GroupedDataFrame.h
+899a9a8db739204288f693caf1ea69fd *inst/include/dplyr/GroupedDataFrame.h
 81d6731f78b1e17697aaa93a465955d4 *inst/include/dplyr/JoinVisitor.h
-c3bf69c1340361c1854ab5777db52675 *inst/include/dplyr/JoinVisitorImpl.h
+8cf938eb62881cf95dc7786f4b481dcf *inst/include/dplyr/JoinVisitorImpl.h
 5f2cab6f3f7d5d4d4c6c115856513617 *inst/include/dplyr/NamedListAccumulator.h
 ba58b85eebf33f253f856976b8b401c3 *inst/include/dplyr/Order.h
 5633c9fe5ef847bf5906eb29815ddff5 *inst/include/dplyr/OrderVisitor.h
@@ -149,7 +149,7 @@ a4a7b9d8ff53dfa73eb0feca626a6ea1 *inst/include/dplyr/Result/max.h
 d3e3361e8ab107064f07e0822e3f1f65 *inst/include/dplyr/Result/min.h
 baf9bbb433b610536594c214d68b42dc *inst/include/dplyr/SummarisedVariable.h
 60bede6dba3153744e89c578d67394a1 *inst/include/dplyr/VectorVisitor.h
-252a2841f207733058e3b3dca2f2c243 *inst/include/dplyr/VectorVisitorImpl.h
+b8d26aff32b8d40719eebdda220b05dd *inst/include/dplyr/VectorVisitorImpl.h
 0a2ea8b5c04bb2b4b3b4d0e769a8b690 *inst/include/dplyr/check_supported_type.h
 e829621788476eb0ade32804c9da173b *inst/include/dplyr/comparisons.h
 7d4d427a6dc08dc4e08140771945406c *inst/include/dplyr/registration.h
@@ -184,7 +184,7 @@ cd00f5b80e3fe023bb254a819e17a572 *inst/include/tools/hash.h
 41148b4d529eef9ebf82f4719cdb48dd *inst/include/tools/tools.h
 e8314bb53785945b394173724249edb0 *inst/include/tools/wrap_subset.h
 da3fe6cebc883b4d0581bdcda1ca7146 *inst/tests/helper-data.r
-376fed4bfd27f2211423537539b88319 *inst/tests/test-arrange.r
+1a8aa7229346de4146476cd2992cacbc *inst/tests/test-arrange.r
 4d52dee57e752f870273d56d01904474 *inst/tests/test-cbind.R
 39988efc666e80566c47eb579443096d *inst/tests/test-copying.R
 7428eb39633a9cdcba500c2550774a43 *inst/tests/test-count.r
@@ -195,13 +195,13 @@ b8182a80cabc5e0dd47a5a76a9d4c66c *inst/tests/test-equiv-manip.r
 e4a0af09c1235803412874a1e43d3088 *inst/tests/test-filter-windowed.R
 9667de562a8713b78df2577583c0f593 *inst/tests/test-filter.r
 8bfc92dd97695014b3b0fe5567ef71bf *inst/tests/test-group-by.r
-9d71f585287a49567ca67e8337afd40c *inst/tests/test-joins.r
+00770ba2f1c5bca8f81bc901c0c6b86f *inst/tests/test-joins.r
 f24c693643de1ea17b87c1df9d32d1ec *inst/tests/test-lead-lag.R
 3566efc0ada3ac3ad8c09f6d9e3ce2c2 *inst/tests/test-mutate-windowed.R
 0775fdf22738928c152a63a0448bfdf2 *inst/tests/test-mutate.r
 881e7e5e30b63f953657d5b89532e760 *inst/tests/test-nth-value.R
-42cb5694e5d14710fa3ecaf6ce519035 *inst/tests/test-rbind.r
-4bdde1886d71f2f89d58b3563db309df *inst/tests/test-select.r
+e284b22d0a7d0ca0264526e6dfc9847e *inst/tests/test-rbind.r
+8d106d9d27aec54eede28bfdfbd9c918 *inst/tests/test-select.r
 bf6c33082e25991bc81b4bbc54ea13fd *inst/tests/test-sql-escape.r
 0f1b6d7ab07eb412c9fecf2c76748921 *inst/tests/test-sql-translation.r
 e1a5cffac4ebe5f0d9a773f5c6b0c37a *inst/tests/test-sqlite-do.r
@@ -234,7 +234,7 @@ f79bf9c4ca8f9b3b810419356f654590 *man/hflights_df.Rd
 7c40154e5820f7993d5d12a50d70fa20 *man/join.Rd
 28ab0e094aafee09c186b8b8cf0ba5a2 *man/join.tbl_df.Rd
 0e46ac1917fd5bc3e9e36218705afe81 *man/join.tbl_dt.Rd
-09d836d3bf214774f4a9f20eba0fc686 *man/join.tbl_sql.Rd
+69f8f22450c6ee248b0db2571c5930e4 *man/join.tbl_sql.Rd
 6a81c20b2f495f643fc4edd5055592e9 *man/lahman.Rd
 4741950e34ee9a9e0d33b127f8fcb319 *man/lead-lag.Rd
 89c5b5da3f33d9e875975668b0b50e2a *man/location.Rd
@@ -282,19 +282,19 @@ dde90f02a4ecf1906e681d2ad580bccf *src/Makevars
 dde90f02a4ecf1906e681d2ad580bccf *src/Makevars.win
 f01fca84c3e964251c648d7aca863b80 *src/RcppExports.cpp
 a6c5f62c1c14b7794bc7bfcc59ca8c77 *src/address.cpp
-8fb59ae44735657c3c6176840653d6ff *src/dplyr.cpp
+f22b7e38a7ed37e5a5cd19a936d2fea1 *src/dplyr.cpp
 90995377fb4770719a3271327bc5d3fd *src/filter.cpp
 e2cbde53c2cbc6feefbbbd265d9347c7 *src/init.cpp
 6f882eb21a671ae3f49e2a7c9529c43c *src/window.cpp
 3a7a51bb5059fdf3d254d644385012d5 *tests/test-all.R
-fec565e75f4edbf88fd294bf88fe4183 *vignettes/databases.Rmd
+3503bfe827ec6123450c6de9da45f445 *vignettes/databases.Rmd
 82084b31c1380fe79dd08e41069b8de3 *vignettes/disabled/benchmark-baseball.Rmd
 5b90937d6069340c27f29589f8dd478f *vignettes/hybrid-evaluation.Rmd
-05e037a31750ef8fd422dcff0b40b111 *vignettes/introduction.Rmd
+fc3fd6d6c9a7bae5339eba50b18e0279 *vignettes/introduction.Rmd
 50c26b952a43373d699173780d05a337 *vignettes/joins.graffle
 391a1e1601255c9e368dbd32b68b6126 *vignettes/notes/mysql-setup.Rmd
 5c93a8a98d068f0f241879dbe7e21ef9 *vignettes/notes/postgres-setup.Rmd
 ef5a210df50e79ac302dd92dd1e15ffb *vignettes/notes/vagrant-setup.Rmd
-ff626a70b5119f8bcbe51f06cf57f892 *vignettes/window-functions.Rmd
+18c6d2bdc11f9774d76bbc372b0c23ec *vignettes/window-functions.Rmd
 83cdde894e0c44ffda5a9dbae3c80092 *vignettes/windows.graffle
 2cc473a6bd316193615aee5045fcc835 *vignettes/windows.png
diff --git a/NAMESPACE b/NAMESPACE
index 76b2a93..65ac4e7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -182,6 +182,7 @@ S3method(sql_create_indexes,MySQLConnection)
 S3method(sql_insert_into,MySQLConnection)
 S3method(sql_insert_into,PostgreSQLConnection)
 S3method(sql_insert_into,SQLiteConnection)
+S3method(sql_select,DBIConnection)
 S3method(src_tbls,src_local)
 S3method(src_tbls,src_sql)
 S3method(summarise,data.frame)
diff --git a/R/dbi-s3.r b/R/dbi-s3.r
index 0484c23..f92639b 100644
--- a/R/dbi-s3.r
+++ b/R/dbi-s3.r
@@ -397,7 +397,12 @@ sql_analyze.MySQLConnection <- function(con, table) {
   qry_run(con, sql)
 }
 
-sql_select <- function(con, select, from, where = NULL, group_by = NULL,
+sql_select <- function(con, ...) {
+  UseMethod("sql_select")
+}
+
+#' @export
+sql_select.DBIConnection <- function(con, select, from, where = NULL, group_by = NULL,
                        having = NULL, order_by = NULL, limit = NULL,
                        offset = NULL) {
 
diff --git a/R/join-sql.r b/R/join-sql.r
index afc4820..d2ac94f 100644
--- a/R/join-sql.r
+++ b/R/join-sql.r
@@ -64,7 +64,7 @@
 #' semi_join(people, hof)
 #'
 #' # All people not in the hall of fame
-#' semi_join(people, hof, anti = TRUE)
+#' anti_join(people, hof)
 #'
 #' # Find all managers
 #' manager <- tbl(lahman_sqlite(), "Managers")
@@ -118,21 +118,24 @@ join_sql <- function(x, y, type, by = NULL, copy = FALSE, auto_index = FALSE,
   ...) {
   type <- match.arg(type, c("left", "right", "inner", "full"))
   by <- by %||% common_by(x, y)
-  
+
   y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by))
 
   # Ensure tables have unique names
   x_names <- auto_names(x$select)
   y_names <- auto_names(y$select)
-  
+
   uniques <- unique_names(x_names, y_names, by)
-  if (!is.null(uniques)) {
+  if (is.null(uniques)) {
+    sel_vars <- c(x_names, y_names)
+  } else {
     x <- update(x, select = setNames(x$select, uniques$x))
-    y <- update(x, select = setNames(y$select, uniques$y))
+    y <- update(y, select = setNames(y$select, uniques$y))
+
+    sel_vars <- unique(c(uniques$x, uniques$y))
   }
+  vars <- lapply(c(by, setdiff(sel_vars, by)), as.name)
 
-  vars <- lapply(c(by, setdiff(c(x_names, y_names), by)), as.name)
-  
   join <- switch(type, left = sql("LEFT"), inner = sql("INNER"),
     right = stop("Right join not supported", call. = FALSE),
     full = stop("Full join not supported", call. = FALSE))
@@ -154,7 +157,7 @@ is.join <- function(x) {
 
 semi_join_sql <- function(x, y, anti = FALSE, by = NULL, copy = FALSE,
   auto_index = FALSE, ...) {
-  
+
   by <- by %||% common_by(x, y)
   y <- auto_copy(x, y, copy, indexes = if (auto_index) list(by))
 
@@ -162,8 +165,8 @@ semi_join_sql <- function(x, y, anti = FALSE, by = NULL, copy = FALSE,
   by_escaped <- escape(ident(by), collapse = NULL, con = con)
   left <- escape(ident("_LEFT"), con = con)
   right <- escape(ident("_RIGHT"), con = con)
-  
-  join <- sql(paste0(left, ".", by_escaped, " = ", right, ".", by_escaped, 
+
+  join <- sql(paste0(left, ".", by_escaped, " = ", right, ".", by_escaped,
     collapse = " AND "))
 
   from <- build_sql(
diff --git a/R/manip-dt.r b/R/manip-dt.r
index 47be50b..ed49379 100644
--- a/R/manip-dt.r
+++ b/R/manip-dt.r
@@ -123,7 +123,10 @@ arrange.tbl_dt <- function(.data, ...) {
 #' @export
 select.data.table <- function(.data, ...) {
   vars <- select_vars(names(.data), ..., env = parent.frame())
-  .data[, vars, drop = FALSE, with = FALSE]
+
+  out <- .data[, vars, drop = FALSE, with = FALSE]
+  setnames(out, names(vars))
+  out
 }
 
 #' @export
diff --git a/R/manip-grouped-dt.r b/R/manip-grouped-dt.r
index a5a80ff..83e0918 100644
--- a/R/manip-grouped-dt.r
+++ b/R/manip-grouped-dt.r
@@ -141,6 +141,7 @@ arrange.grouped_dt <- function(.data, ...) {
 select.grouped_dt <- function(.data, ...) {
   vars <- select_vars(names(.data), ..., env = parent.frame())
   out <- .data[, vars, drop = FALSE, with = FALSE]
+  setnames(out, names(vars))
 
   grouped_dt(
     data = out,
diff --git a/R/utils-format.r b/R/utils-format.r
index 5935f6f..fcc5bc7 100644
--- a/R/utils-format.r
+++ b/R/utils-format.r
@@ -15,7 +15,7 @@ dim_desc <- function(x) {
   d <- dim(x)
   d2 <- format(d, big.mark = ",", justify = "none", trim = TRUE)
   d2[is.na(d)] <- "??"
-  
+
   paste0("[", paste0(d2, collapse = " x "), "]")
 }
 
@@ -24,16 +24,18 @@ dim_desc <- function(x) {
 trunc_mat <- function(x, n = NULL) {
   rows <- nrow(x)
   if (!is.na(rows) && rows == 0) return()
-  
+
   if (is.null(n)) {
     if (is.na(rows) || rows > getOption("dplyr.print_max")) {
-      n <- getOption("dplyr.print_min") 
+      n <- getOption("dplyr.print_min")
     } else {
       n <- rows
     }
   }
-    
+
   df <- as.data.frame(head(x, n))
+  if (nrow(df) == 0) return()
+
   mat <- format(df, justify = "left")
 
   width <- getOption("width")
@@ -50,7 +52,7 @@ trunc_mat <- function(x, n = NULL) {
     df[[1]] <- substr(df[[1]], 1, width)
   }
   shrunk <- format(df[, !too_wide, drop = FALSE])
-  
+
   needs_dots <- is.na(rows) || rows > n
   if (needs_dots) {
     dot_width <- pmin(w[-1][!too_wide], 3)
@@ -64,14 +66,14 @@ trunc_mat <- function(x, n = NULL) {
     vars <- colnames(mat)[too_wide]
     types <- vapply(df[too_wide], type_sum, character(1))
     var_types <- paste0(vars, " (", types, ")", collapse = ", ")
-    
+
     cat(wrap("Variables not shown: ", var_types), "\n", sep = "")
   }
 }
 
 wrap <- function(..., indent = 0) {
   x <- paste0(..., collapse = "")
-  wrapped <- strwrap(x, indent = indent, exdent = indent + 2, 
+  wrapped <- strwrap(x, indent = indent, exdent = indent + 2,
     width = getOption("width"))
   paste0(wrapped, collapse = "\n")
 }
diff --git a/inst/doc/databases.R b/inst/doc/databases.R
index 43ba7d6..bbda378 100644
--- a/inst/doc/databases.R
+++ b/inst/doc/databases.R
@@ -83,13 +83,13 @@ translate_sql(1L)
 
 
 ## ------------------------------------------------------------------------
-translate_sql(glob(x, y)) 
+translate_sql(glob(x, y))
 translate_sql(x %like% "ab*")
 
 
 ## ------------------------------------------------------------------------
 planes <- group_by(hflights_sqlite, TailNum)
-delay <- summarise(planes, 
+delay <- summarise(planes,
   count = n(),
   dist = mean(Distance),
   delay = mean(ArrDelay)
@@ -107,12 +107,12 @@ if (has_lahman("postgres")) {
 ## ------------------------------------------------------------------------
 if (has_lahman("postgres")) {
   daily <- group_by(hflights_postgres, Year, Month, DayofMonth)
-  
+
   # Find the most and least delayed flight each day
-  bestworst <- filter(daily, ArrDelay == min(ArrDelay) || 
+  bestworst <- filter(daily, ArrDelay == min(ArrDelay) ||
     ArrDelay == max(ArrDelay))
   bestworst$query
-  
+
   # Rank each flight within a daily
   ranked <- mutate(daily, rank = rank(desc(ArrDelay)))
   ranked$query
diff --git a/inst/doc/databases.Rmd b/inst/doc/databases.Rmd
index 4d696d7..e2e98cd 100644
--- a/inst/doc/databases.Rmd
+++ b/inst/doc/databases.Rmd
@@ -19,9 +19,9 @@ As well as working with local in-memory data like data frames and data tables, d
 
 Since R almost exclusively works with in-memory data, if you do have a lot of data in a database, you can't just dump it into R. Instead, you'll have to work with subsets or aggregates, and dplyr aims to make that as easy as possible. If you're working with large data, it's also likely that you'll need support to get the data into the database and to ensure you have the right indices for good performance. dplyr provides some simple tools to help with these tasks but they are no substitute for a local expert.
 
-The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates `SELECT` statements, the SQL you write most often as an analyst. 
+The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates `SELECT` statements, the SQL you write most often as an analyst.
 
-To get the most out of this chapter, you'll need to be familiar with querying SQL databases using the `SELECT` statement.
+To get the most out of this chapter, you'll need to be familiar with querying SQL databases using the `SELECT` statement.  If you have some familiarity with SQL and you'd like to learn more, I found   [how indexes work in SQLite](http://www.sqlite.org/queryplanner.html) and [10 easy steps to a complete understanding of SQL](http://tech.pro/tutorial/1555/10-easy-steps-to-a-complete-understanding-of-sql) to be particularly helpful.
 
 ## Getting started
 
@@ -88,7 +88,7 @@ c3 <- mutate(c2, Speed = Distance / AirTime * 60)
 c4 <- arrange(c3, Year, Month, DayofMonth, UniqueCarrier)
 ```
 
-Suprisingly, this sequence of operations never actually touches the database. It's not until you ask for the data (e.g. by printing `c4`) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows. 
+Suprisingly, this sequence of operations never actually touches the database. It's not until you ask for the data (e.g. by printing `c4`) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows.
 
 ```{r}
 c4
@@ -118,7 +118,7 @@ There are three ways to force the computation of a query:
 
 * `collect()` executes the query and returns the results to R.
 
-* `compute()` executes the query and stores the results in a temporary table 
+* `compute()` executes the query and stores the results in a temporary table
   in the database.
 
 * `collapse()` turns the query into a table expresion.
@@ -129,17 +129,17 @@ You are most likely to use `collect()`: once you have interactively converged on
 
 dplyr tries to prevent you from accidentally performing expensive query operations:
 
-* `nrow()` is always `NA`: in general, there's no way to determine how 
+* `nrow()` is always `NA`: in general, there's no way to determine how
   many rows a query will return unless you actually run it.
 
 * Printing a tbl only runs the query enough to get the first 10 rows
 
-* You can use `tail()` on database tbls: you can't find the last rows 
-  without executing the whole query. 
+* You can use `tail()` on database tbls: you can't find the last rows
+  without executing the whole query.
 
 ## SQL translation
 
-When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it's relatively straightforward to translate R code to SQL (or indeed to any programming language). 
+When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it's relatively straightforward to translate R code to SQL (or indeed to any programming language).
 
 To experiment with the translation, use `translate_sql()`. The following examples work through some basic differences between R and SQL.
 
@@ -164,8 +164,8 @@ translate_sql(1L)
 dplyr knows how to convert the following R functions to SQL:
 
 * basic math operators: `+`, `-`, `*`, `/`, `%%`, `^`
-* math functions: `abs`, `acos`, `acosh`, `asin`, `asinh`, `atan`, `atan2`, 
-  `atanh`, `ceiling`, `cos`, `cosh`, `cot`, `coth`, `exp`, `floor`, 
+* math functions: `abs`, `acos`, `acosh`, `asin`, `asinh`, `atan`, `atan2`,
+  `atanh`, `ceiling`, `cos`, `cosh`, `cot`, `coth`, `exp`, `floor`,
   `log`, `log10`, `round`, `sign`, `sin`, `sinh`, `sqrt`, `tan`, `tanh`
 * logical comparisons: `<`, `<=`, `!=`, `>=`, `>`, `==`, `%in%`
 * boolean operations: `&`, `&&`, `|`, `||`, `!`, `xor`
@@ -183,7 +183,7 @@ translate_sql(mean(x, trim = T))
 Any function that dplyr does't know how to convert it leaves as is - that means if you want to use any other function that database provides, you can use it as is. Here a couple of examples that will work with [SQLite](http://www.sqlite.org/lang_corefunc.html):
 
 ```{r}
-translate_sql(glob(x, y)) 
+translate_sql(glob(x, y))
 translate_sql(x %like% "ab*")
 ```
 
@@ -193,7 +193,7 @@ SQLite lacks window functions, which are needed for grouped mutation and filteri
 
 ```{r}
 planes <- group_by(hflights_sqlite, TailNum)
-delay <- summarise(planes, 
+delay <- summarise(planes,
   count = n(),
   dist = mean(Distance),
   delay = mean(ArrDelay)
@@ -231,12 +231,12 @@ The following examples shows the grouped filter and mutate possible with Postgre
 ```{r}
 if (has_lahman("postgres")) {
   daily <- group_by(hflights_postgres, Year, Month, DayofMonth)
-  
+
   # Find the most and least delayed flight each day
-  bestworst <- filter(daily, ArrDelay == min(ArrDelay) || 
+  bestworst <- filter(daily, ArrDelay == min(ArrDelay) ||
     ArrDelay == max(ArrDelay))
   bestworst$query
-  
+
   # Rank each flight within a daily
   ranked <- mutate(daily, rank = rank(desc(ArrDelay)))
   ranked$query
@@ -253,7 +253,7 @@ In terms of functionality, MySQL lies somewhere between SQLite and PostgreSQL. I
 
 Bigquery is a hosted database server provided by google. To connect, you need to provide your `project`, `dataset` and optionally a project for `billing` (if billing for `project` isn't enabled). After you create the src, your web browser will open and ask you to authenticate. Your credentials are stored in a local cache, so you should only need to do this once.
 
-Bigquery supports only a single SQL statement: [SELECT](https://developers.google.com/bigquery/query-reference). Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql. 
+Bigquery supports only a single SQL statement: [SELECT](https://developers.google.com/bigquery/query-reference). Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql.
 
 ## Picking a database
 
diff --git a/inst/doc/databases.html b/inst/doc/databases.html
index 2ddd0da..adf69a2 100644
--- a/inst/doc/databases.html
+++ b/inst/doc/databases.html
@@ -200,9 +200,9 @@ <h1>Databases</h1>
 
 <p>Since R almost exclusively works with in-memory data, if you do have a lot of data in a database, you can&#39;t just dump it into R. Instead, you&#39;ll have to work with subsets or aggregates, and dplyr aims to make that as easy as possible. If you&#39;re working with large data, it&#39;s also likely that you&#39;ll need support to get the data into the database and to ensure you have the right indices for good performance. dplyr provides some simple tools to help with these tasks but they are no substitute for a local expert.</p>
 
-<p>The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates <code>SELECT</code> statements, the SQL you write most often as an analyst. </p>
+<p>The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates <code>SELECT</code> statements, the SQL you write most often as an analyst.</p>
 
-<p>To get the most out of this chapter, you&#39;ll need to be familiar with querying SQL databases using the <code>SELECT</code> statement.</p>
+<p>To get the most out of this chapter, you&#39;ll need to be familiar with querying SQL databases using the <code>SELECT</code> statement.  If you have some familiarity with SQL and you&#39;d like to learn more, I found   <a href="http://www.sqlite.org/queryplanner.html">how indexes work in SQLite</a> and <a href="http://tech.pro/tutorial/1555/10-easy-steps-to-a-complete-understanding-of-sql">10 easy steps to a complete understanding of SQL</a> to be particularly helpful.</p>
 
 <h2>Getting started</h2>
 
@@ -236,7 +236,7 @@ <h2>Getting started</h2>
 <pre><code class="r">hflights_sqlite
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [227,496 x 21]
 #&gt; 
 #&gt;    Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
@@ -263,7 +263,7 @@ <h2>Basic verbs</h2>
 <pre><code class="r">select(hflights_sqlite, Year:DayofMonth, DepDelay, ArrDelay)
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [227,496 x 5]
 #&gt; 
 #&gt;    Year Month DayofMonth DepDelay ArrDelay
@@ -277,7 +277,7 @@ <h2>Basic verbs</h2>
 <pre><code class="r">filter(hflights_sqlite, depDelay &gt; 240)
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [389 x 21]
 #&gt; Filter: depDelay &gt; 240 
 #&gt; 
@@ -296,7 +296,7 @@ <h2>Basic verbs</h2>
 <pre><code class="r">arrange(hflights_sqlite, Year, Month, DayofMonth)
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [227,496 x 21]
 #&gt; Arrange: Year, Month, DayofMonth 
 #&gt; 
@@ -315,7 +315,7 @@ <h2>Basic verbs</h2>
 <pre><code class="r">mutate(hflights_sqlite, speed = AirTime / Distance)
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [227,496 x 22]
 #&gt; 
 #&gt;    Year Month DayofMonth DayOfWeek DepTime ArrTime UniqueCarrier FlightNum
@@ -333,7 +333,7 @@ <h2>Basic verbs</h2>
 <pre><code class="r">summarise(hflights_sqlite, delay = mean(DepTime))
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: &lt;derived table&gt; [?? x 1]
 #&gt; 
 #&gt;    delay
@@ -361,12 +361,12 @@ <h2>Lazyness</h2>
 c4 &lt;- arrange(c3, Year, Month, DayofMonth, UniqueCarrier)
 </code></pre>
 
-<p>Suprisingly, this sequence of operations never actually touches the database. It&#39;s not until you ask for the data (e.g. by printing <code>c4</code>) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows. </p>
+<p>Suprisingly, this sequence of operations never actually touches the database. It&#39;s not until you ask for the data (e.g. by printing <code>c4</code>) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows.</p>
 
 <pre><code class="r">c4
 </code></pre>
 
-<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/Rtmp88DKxz/Rinst5e594af8e4dc/dplyr/db/hflights.sqlite]
+<pre><code>#&gt; Source: sqlite 3.7.17 [/private/tmp/RtmptxkEBK/Rinst1424b59c21ad0/dplyr/db/hflights.sqlite]
 #&gt; From: hflights [109,996 x 8]
 #&gt; Filter: DepDelay &gt; 0 
 #&gt; Arrange: Year, Month, DayofMonth, UniqueCarrier 
@@ -403,7 +403,7 @@ <h2>Lazyness</h2>
 #&gt; FROM &quot;hflights&quot;
 #&gt; WHERE &quot;DepDelay&quot; &gt; 0.0
 #&gt; ORDER BY &quot;Year&quot;, &quot;Month&quot;, &quot;DayofMonth&quot;, &quot;UniqueCarrier&quot;
-#&gt; &lt;SQLiteConnection: DBI CON (24184, 0)&gt;
+#&gt; &lt;SQLiteConnection: DBI CON (82546, 0)&gt;
 </code></pre>
 
 <p>You can also ask the database how it plans to execute the query with <code>explain()</code>. The output for SQLite is explained in more detail on the <a href="http://www.sqlite.org/eqp.html">SQLite website</a> and is helpful if you&#39;re trying to figure out what indexes are being used.</p>
@@ -429,7 +429,7 @@ <h3>Forcing computation</h3>
 
 <ul>
 <li><p><code>collect()</code> executes the query and returns the results to R.</p></li>
-<li><p><code>compute()</code> executes the query and stores the results in a temporary table 
+<li><p><code>compute()</code> executes the query and stores the results in a temporary table
 in the database.</p></li>
 <li><p><code>collapse()</code> turns the query into a table expresion.</p></li>
 </ul>
@@ -441,16 +441,16 @@ <h3>Performance considerations</h3>
 <p>dplyr tries to prevent you from accidentally performing expensive query operations:</p>
 
 <ul>
-<li><p><code>nrow()</code> is always <code>NA</code>: in general, there&#39;s no way to determine how 
+<li><p><code>nrow()</code> is always <code>NA</code>: in general, there&#39;s no way to determine how
 many rows a query will return unless you actually run it.</p></li>
 <li><p>Printing a tbl only runs the query enough to get the first 10 rows</p></li>
-<li><p>You can use <code>tail()</code> on database tbls: you can&#39;t find the last rows 
-without executing the whole query. </p></li>
+<li><p>You can use <code>tail()</code> on database tbls: you can&#39;t find the last rows
+without executing the whole query.</p></li>
 </ul>
 
 <h2>SQL translation</h2>
 
-<p>When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it&#39;s relatively straightforward to translate R code to SQL (or indeed to any programming language). </p>
+<p>When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it&#39;s relatively straightforward to translate R code to SQL (or indeed to any programming language).</p>
 
 <p>To experiment with the translation, use <code>translate_sql()</code>. The following examples work through some basic differences between R and SQL.</p>
 
@@ -508,8 +508,8 @@ <h2>SQL translation</h2>
 
 <ul>
 <li>basic math operators: <code>+</code>, <code>-</code>, <code>*</code>, <code>/</code>, <code>%%</code>, <code>^</code></li>
-<li>math functions: <code>abs</code>, <code>acos</code>, <code>acosh</code>, <code>asin</code>, <code>asinh</code>, <code>atan</code>, <code>atan2</code>, 
-<code>atanh</code>, <code>ceiling</code>, <code>cos</code>, <code>cosh</code>, <code>cot</code>, <code>coth</code>, <code>exp</code>, <code>floor</code>, 
+<li>math functions: <code>abs</code>, <code>acos</code>, <code>acosh</code>, <code>asin</code>, <code>asinh</code>, <code>atan</code>, <code>atan2</code>,
+<code>atanh</code>, <code>ceiling</code>, <code>cos</code>, <code>cosh</code>, <code>cot</code>, <code>coth</code>, <code>exp</code>, <code>floor</code>,
 <code>log</code>, <code>log10</code>, <code>round</code>, <code>sign</code>, <code>sin</code>, <code>sinh</code>, <code>sqrt</code>, <code>tan</code>, <code>tanh</code></li>
 <li>logical comparisons: <code>&lt;</code>, <code>&lt;=</code>, <code>!=</code>, <code>&gt;=</code>, <code>&gt;</code>, <code>==</code>, <code>%in%</code></li>
 <li>boolean operations: <code>&amp;</code>, <code>&amp;&amp;</code>, <code>|</code>, <code>||</code>, <code>!</code>, <code>xor</code></li>
@@ -526,7 +526,7 @@ <h2>SQL translation</h2>
 
 <p>Any function that dplyr does&#39;t know how to convert it leaves as is - that means if you want to use any other function that database provides, you can use it as is. Here a couple of examples that will work with <a href="http://www.sqlite.org/lang_corefunc.html">SQLite</a>:</p>
 
-<pre><code class="r">translate_sql(glob(x, y)) 
+<pre><code class="r">translate_sql(glob(x, y))
 </code></pre>
 
 <pre><code>#&gt; &lt;SQL&gt; GLOB(&quot;x&quot;, &quot;y&quot;)
@@ -543,7 +543,7 @@ <h2>Grouping</h2>
 <p>SQLite lacks window functions, which are needed for grouped mutation and filtering. This means that only really useful operation for grouped sqlite tables in <code>summarise()</code>. The grouped summarise from the introduction translates well - the only difference is that databases always drop NULLs (their equivalent of missing values), so we don&#39;t supply <code>na.rm = TRUE</code>.</p>
 
 <pre><code class="r">planes &lt;- group_by(hflights_sqlite, TailNum)
-delay &lt;- summarise(planes, 
+delay &lt;- summarise(planes,
   count = n(),
   dist = mean(Distance),
   delay = mean(ArrDelay)
@@ -570,7 +570,6 @@ <h3>Postgresql</h3>
 </code></pre>
 
 <pre><code>#&gt; Loading required package: RPostgreSQL
-#&gt; Auto-disconnecting postgres connection (24184, 0)
 </code></pre>
 
 <p>Postgres is a considerably more powerful database than SQLite.  It has:</p>
@@ -586,7 +585,7 @@ <h3>Postgresql</h3>
   daily &lt;- group_by(hflights_postgres, Year, Month, DayofMonth)
 
   # Find the most and least delayed flight each day
-  bestworst &lt;- filter(daily, ArrDelay == min(ArrDelay) || 
+  bestworst &lt;- filter(daily, ArrDelay == min(ArrDelay) ||
     ArrDelay == max(ArrDelay))
   bestworst$query
 
@@ -596,14 +595,14 @@ <h3>Postgresql</h3>
 }
 </code></pre>
 
-<pre><code>#&gt; Auto-disconnecting postgres connection (24184, 2)
+<pre><code>#&gt; Auto-disconnecting postgres connection (82546, 2)
 </code></pre>
 
 <pre><code>#&gt; &lt;Query&gt; SELECT &quot;Year&quot;, &quot;Month&quot;, &quot;DayofMonth&quot;, &quot;DayOfWeek&quot;, &quot;DepTime&quot;, &quot;ArrTime&quot;, &quot;UniqueCarrier&quot;, &quot;FlightNum&quot;, &quot;TailNum&quot;, &quot;ActualElapsedTime&quot;, &quot;AirTime&quot;, &quot;ArrDelay&quot;, &quot;DepDelay&quot;, &quot;Origin&quot;, &quot;Dest&quot;, &quot;Distance&quot;, &quot;TaxiIn&quot;, &quot;TaxiOut&quot;, &quot;Cancelled&quot;, &quot;CancellationCode&quot;, &quot;Diverted&quot;, rank() OVER (PARTITION BY (&quot;Year&quot;, &quot;Month&quot;, &quot;DayofMonth&quot;) ORDER BY &quot;ArrDelay&quot; DESC) AS &quot;rank&quot;
 #&gt; FROM &quot;hflights&quot;
 #&gt; An object of class &quot;PostgreSQLConnection&quot;
 #&gt; Slot &quot;Id&quot;:
-#&gt; [1] 24184     1
+#&gt; [1] 82546     1
 </code></pre>
 
 <h3>MySQL and MariaDB</h3>
@@ -616,7 +615,7 @@ <h3>Bigquery</h3>
 
 <p>Bigquery is a hosted database server provided by google. To connect, you need to provide your <code>project</code>, <code>dataset</code> and optionally a project for <code>billing</code> (if billing for <code>project</code> isn&#39;t enabled). After you create the src, your web browser will open and ask you to authenticate. Your credentials are stored in a local cache, so you should only need to do this once.</p>
 
-<p>Bigquery supports only a single SQL statement: <a href="https://developers.google.com/bigquery/query-reference">SELECT</a>. Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql. </p>
+<p>Bigquery supports only a single SQL statement: <a href="https://developers.google.com/bigquery/query-reference">SELECT</a>. Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql.</p>
 
 <h2>Picking a database</h2>
 
diff --git a/inst/doc/introduction.Rmd b/inst/doc/introduction.Rmd
index 90942bf..ad50a90 100644
--- a/inst/doc/introduction.Rmd
+++ b/inst/doc/introduction.Rmd
@@ -269,7 +269,7 @@ filter(
 )
 ```
 
-This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the `%.%` operator. `x %.% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations so you can read from left-to-riht, top-to-bottom:
+This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the `%.%` operator. `x %.% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations so you can read from left-to-right, top-to-bottom:
 
 ```{r, eval = FALSE}
 hflights %.%
@@ -309,11 +309,11 @@ Compared to DBI and the database connection algorithms:
 
 * it hides, as much as possible, the fact that you're working with a remote database
 * you don't need to know any sql (although it helps!)
-* it shims over the many differences between the difference DBI implementations
+* it shims over the many differences between the different DBI implementations
 
 ## Multidimensional arrays / cubes
 
-`tbl_cube()` provides an experimental interface to multidimenssional arrays or data cubes. If you're using this form of data in R, please get in touch so I can better understand your needs.
+`tbl_cube()` provides an experimental interface to multidimensional arrays or data cubes. If you're using this form of data in R, please get in touch so I can better understand your needs.
 
 # Comparisons
 
diff --git a/inst/doc/introduction.html b/inst/doc/introduction.html
index ef45f96..623af29 100644
--- a/inst/doc/introduction.html
+++ b/inst/doc/introduction.html
@@ -631,18 +631,18 @@ <h2>Chaining</h2>
 )
 </code></pre>
 
-<pre><code>#&gt; Source: local data frame [14 x 4]
-#&gt; Groups: DayofMonth, Month
+<pre><code>#&gt; Source: local data frame [14 x 5]
+#&gt; Groups: Year, Month
 #&gt; 
-#&gt;    DayofMonth Month   arr   dep
-#&gt; 1           4     2 44.08 47.17
-#&gt; 2           3     3 35.13 38.20
-#&gt; 3          14     3 46.64 36.14
-#&gt; 4           4     4 38.72 27.95
-#&gt; ..        ...   ...   ...   ...
+#&gt;    Year Month DayofMonth   arr   dep
+#&gt; 1  2011     2          4 44.08 47.17
+#&gt; 2  2011     3          3 35.13 38.20
+#&gt; 3  2011     3         14 46.64 36.14
+#&gt; 4  2011     4          4 38.72 27.95
+#&gt; ..  ...   ...        ...   ...   ...
 </code></pre>
 
-<p>This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the <code>%.%</code> operator. <code>x %.% f(y)</code> turns into <code>f(x, y)</code> so you can use it to rewrite multiple operations so you can read from left-to-riht, top-to-bottom:</p>
+<p>This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the <code>%.%</code> operator. <code>x %.% f(y)</code> turns into <code>f(x, y)</code> so you can use it to rewrite multiple operations so you can read from left-to-right, top-to-bottom:</p>
 
 <pre><code class="r">hflights %.%
   group_by(Year, Month, DayofMonth) %.%
@@ -683,12 +683,12 @@ <h2>Databases</h2>
 <ul>
 <li>it hides, as much as possible, the fact that you&#39;re working with a remote database</li>
 <li>you don&#39;t need to know any sql (although it helps!)</li>
-<li>it shims over the many differences between the difference DBI implementations</li>
+<li>it shims over the many differences between the different DBI implementations</li>
 </ul>
 
 <h2>Multidimensional arrays / cubes</h2>
 
-<p><code>tbl_cube()</code> provides an experimental interface to multidimenssional arrays or data cubes. If you&#39;re using this form of data in R, please get in touch so I can better understand your needs.</p>
+<p><code>tbl_cube()</code> provides an experimental interface to multidimensional arrays or data cubes. If you&#39;re using this form of data in R, please get in touch so I can better understand your needs.</p>
 
 <h1>Comparisons</h1>
 
diff --git a/inst/doc/window-functions.R b/inst/doc/window-functions.R
index 90ccb57..762870d 100644
--- a/inst/doc/window-functions.R
+++ b/inst/doc/window-functions.R
@@ -10,7 +10,7 @@ batting <- select(tbl_df(Batting), playerID, yearID, teamID, G, AB:H)
 batting <- arrange(batting, playerID, yearID, teamID)
 players <- group_by(batting, playerID)
 
-# For each player, find the two years with most home runs
+# For each player, find the two years with most hits
 filter(players, min_rank(desc(H)) <= 2 & H > 0)
 # Within each player, rank each year by the number of games played
 mutate(players, G_rank = min_rank(G))
@@ -67,7 +67,7 @@ mutate(players, G_delta = G - lag(G))
 
 ## ----, results = "hide"--------------------------------------------------
 # Find when a player changed teams
-filter(players, teamID != lag(teamID)); TRUE
+filter(players, teamID != lag(teamID))
 
 
 ## ------------------------------------------------------------------------
diff --git a/inst/doc/window-functions.Rmd b/inst/doc/window-functions.Rmd
index 3dd3997..ba2d595 100644
--- a/inst/doc/window-functions.Rmd
+++ b/inst/doc/window-functions.Rmd
@@ -20,7 +20,7 @@ batting <- select(tbl_df(Batting), playerID, yearID, teamID, G, AB:H)
 batting <- arrange(batting, playerID, yearID, teamID)
 players <- group_by(batting, playerID)
 
-# For each player, find the two years with most home runs
+# For each player, find the two years with most hits
 filter(players, min_rank(desc(H)) <= 2 & H > 0)
 # Within each player, rank each year by the number of games played
 mutate(players, G_rank = min_rank(G))
@@ -137,7 +137,7 @@ You can use them to:
   
     ```{r, results = "hide"}
     # Find when a player changed teams
-    filter(players, teamID != lag(teamID)); TRUE
+    filter(players, teamID != lag(teamID))
     ```
 
 `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This important if you have not already sorted the data, or you want to sort one way and lag another. 
diff --git a/inst/doc/window-functions.html b/inst/doc/window-functions.html
index 96cf328..366b51c 100644
--- a/inst/doc/window-functions.html
+++ b/inst/doc/window-functions.html
@@ -204,7 +204,7 @@ <h1>Window functions and grouped mutate/filter</h1>
 batting &lt;- arrange(batting, playerID, yearID, teamID)
 players &lt;- group_by(batting, playerID)
 
-# For each player, find the two years with most home runs
+# For each player, find the two years with most hits
 filter(players, min_rank(desc(H)) &lt;= 2 &amp; H &gt; 0)
 # Within each player, rank each year by the number of games played
 mutate(players, G_rank = min_rank(G))
@@ -355,7 +355,7 @@ <h3>Lead and lag</h3>
 <li><p>Find out when a value changes.</p>
 
 <pre><code class="r"># Find when a player changed teams
-filter(players, teamID != lag(teamID)); TRUE
+filter(players, teamID != lag(teamID))
 </code></pre></li>
 </ul>
 
@@ -371,12 +371,12 @@ <h3>Lead and lag</h3>
 </code></pre>
 
 <pre><code>#&gt;   year value running
-#&gt; 1 2000     0      14
-#&gt; 2 2001     1      10
-#&gt; 3 2002     4      14
-#&gt; 4 2003     9       9
-#&gt; 5 2004    16      55
-#&gt; 6 2005    25      39
+#&gt; 1 2000     0      46
+#&gt; 2 2001     1      46
+#&gt; 3 2002     4      45
+#&gt; 4 2003     9      55
+#&gt; 5 2004    16      41
+#&gt; 6 2005    25      25
 </code></pre>
 
 <pre><code class="r">
diff --git a/inst/include/dplyr/Collecter.h b/inst/include/dplyr/Collecter.h
index 2b4f6c6..5b1b331 100644
--- a/inst/include/dplyr/Collecter.h
+++ b/inst/include/dplyr/Collecter.h
@@ -120,9 +120,13 @@ namespace dplyr {
             
             SEXP* levels_ptr = Rcpp::internal::r_vector_start<STRSXP>(levels) ;
             int* source_ptr = Rcpp::internal::r_vector_start<INTSXP>(source) ;
-            for( int i=0; i<index.size(); i++){ 
-                SEXP x = levels_ptr[ source_ptr[i] - 1 ] ;
-                data[ index[i] ] = levels_map.find(x)->second ;
+            for( int i=0; i<index.size(); i++){
+                if( source_ptr[i] == NA_INTEGER ){
+                    data[ index[i] ] = NA_INTEGER ;
+                } else {
+                    SEXP x = levels_ptr[ source_ptr[i] - 1 ] ;
+                    data[ index[i] ] = levels_map.find(x)->second ;
+                }
             } 
         }
         
diff --git a/inst/include/dplyr/GroupedDataFrame.h b/inst/include/dplyr/GroupedDataFrame.h
index ff7c462..b93c575 100644
--- a/inst/include/dplyr/GroupedDataFrame.h
+++ b/inst/include/dplyr/GroupedDataFrame.h
@@ -93,7 +93,7 @@ namespace Rcpp {
     
     template <>
     inline bool is<GroupedDataFrame>( SEXP x){
-        return Rf_inherits(x, "grouped_df" ) ;
+        return Rf_inherits(x, "grouped_df" ) && Rf_getAttrib(x, Rf_install("vars") ) != R_NilValue ;
     }
     
     inline GroupedDataFrameIndexIterator::GroupedDataFrameIndexIterator( const GroupedDataFrame& gdf_ ) : 
diff --git a/inst/include/dplyr/JoinVisitorImpl.h b/inst/include/dplyr/JoinVisitorImpl.h
index 15d5c00..a375510 100644
--- a/inst/include/dplyr/JoinVisitorImpl.h
+++ b/inst/include/dplyr/JoinVisitorImpl.h
@@ -152,7 +152,11 @@ namespace dplyr{
         boost::hash<SEXP> string_hash ;
     
         inline SEXP get(int i){
-            return i>=0 ? left_levels_ptr[ left[i] - 1] : right_levels_ptr[right[-i-1] - 1] ;    
+            if( i >= 0 ){
+                return ( left[i] == NA_INTEGER ) ? NA_STRING : left_levels_ptr[ left[i] - 1] ;
+            } else {
+                return ( right[-i-1] == NA_INTEGER ) ? NA_STRING : right_levels_ptr[right[-i-1] - 1] ;                  
+            }
         }
         
     } ;
diff --git a/inst/include/dplyr/VectorVisitorImpl.h b/inst/include/dplyr/VectorVisitorImpl.h
index 25b0b35..1d4c332 100644
--- a/inst/include/dplyr/VectorVisitorImpl.h
+++ b/inst/include/dplyr/VectorVisitorImpl.h
@@ -18,6 +18,7 @@ namespace dplyr {
     template <> inline std::string VectorVisitorType<REALSXP>(){ return "numeric" ; }
     template <> inline std::string VectorVisitorType<LGLSXP>() { return "logical" ; }
     template <> inline std::string VectorVisitorType<STRSXP>() { return "character" ; }
+    template <> inline std::string VectorVisitorType<VECSXP>() { return "list" ; }
     
     /** 
      * Implementations 
@@ -102,14 +103,28 @@ namespace dplyr {
         inline SEXP subset_int_index( const Container& index ) const {
             int n = output_size(index) ;
             VECTOR out = Rcpp::no_init(n) ;
-            // TODO: find a way to mark that we don't need the NA handling
-            for( int i=0; i<n; i++) 
-                out[i] = (index[i] < 0) ? VECTOR::get_na() : vec[ index[i] ] ;
+            for( int i=0; i<n; i++){
+                if( index[i] < 0 ){
+                    out[i] = VECTOR::get_na() ;
+                } else {
+                    out[i] = vec[ index[i] ] ;
+                }
+            }
             return out ;
         }
         
     } ;
     
+    template <>
+    template <typename Container>
+    SEXP VectorVisitorImpl<VECSXP>::subset_int_index( const Container& index ) const {
+        int n = output_size(index) ;
+        List out(n) ;
+        for( int i=0; i<n; i++) 
+            out[i] = (index[i] < 0) ? R_NilValue : vec[ index[i] ] ;
+        return out ;
+    }
+    
     template <typename VisitorImpl> 
     class PromoteClassVisitor : public VisitorImpl {
     public:
@@ -268,6 +283,8 @@ namespace dplyr {
                 return new VectorVisitorImpl<REALSXP>( vec ) ;
             case LGLSXP:  return new VectorVisitorImpl<LGLSXP>( vec ) ;
             case STRSXP:  return new VectorVisitorImpl<STRSXP>( vec ) ;
+                
+            case VECSXP:  return new VectorVisitorImpl<VECSXP>( vec ) ;
             default: break ;
         }
         
diff --git a/inst/tests/test-arrange.r b/inst/tests/test-arrange.r
index d33c4ea..8945f15 100644
--- a/inst/tests/test-arrange.r
+++ b/inst/tests/test-arrange.r
@@ -80,3 +80,10 @@ test_that("arrange uses the white list", {
 
 })
 
+test_that("arrange handles list columns (#282)", {
+  df <- data.frame( a = 2:1 )
+  df$b <- list( "foo", "bar" )
+  res <- arrange(df, a)
+  expect_equal(res$b, list( "bar", "foo" ) )
+})
+
diff --git a/inst/tests/test-joins.r b/inst/tests/test-joins.r
index ba9f764..ef5cf66 100644
--- a/inst/tests/test-joins.r
+++ b/inst/tests/test-joins.r
@@ -122,3 +122,11 @@ test_that("univariate left join has all columns, all rows", {
   expect_equal(j1$z.y, c(1, 1, 2, 3, NA))
   expect_equal(j2$z.y, c(1, 2, 3, 3, NA))
 })
+
+test_that("inner_join does not segfault on NA in factors (#306)", {
+  a <- data.frame(x=c("p", "q", NA), y=c(1, 2, 3), stringsAsFactors=TRUE)
+  b <- data.frame(x=c("p", "q", "r"), z=c(4,5,6), stringsAsFactors=TRUE)
+  res <- inner_join(a, b)
+  expect_equal( nrow(res), 2L )
+})
+
diff --git a/inst/tests/test-rbind.r b/inst/tests/test-rbind.r
index 3384594..4396a7e 100644
--- a/inst/tests/test-rbind.r
+++ b/inst/tests/test-rbind.r
@@ -80,3 +80,13 @@ test_that( "rbind handles NULL",{
   expect_equal(nrow(res), 30L)
 })
 
+test_that( "rbind handles NA in factors #279", {
+  xx <- as.data.frame(list(a=as.numeric(NA), b="c", c="d")) 
+  zz <- as.data.frame(list(a=1, b=as.character(NA), c="b"))
+  expect_warning( res <- rbind_list( xx, zz ) )
+  
+  expect_equal(res$a, c(NA,1.0))
+  expect_equal(res$b, c("c", NA))
+  expect_equal(res$c, c("d","b"))
+  
+})
diff --git a/inst/tests/test-select.r b/inst/tests/test-select.r
index 3959c56..33a72f1 100644
--- a/inst/tests/test-select.r
+++ b/inst/tests/test-select.r
@@ -62,3 +62,26 @@ test_that("num_range selects numeric ranges", {
   expect_equal(select_vars(vars, num_range("x", 10:11, width = 2)), vars[5:6])
 })
 
+# Data table -------------------------------------------------------------------
+
+test_that("select changes columns in copy of data table", {dt <- data.table(x = 1:4, y = letters[1:4])
+
+  expect_equal(names(select(dt, x, z = y)), c("x", "z"))
+  expect_equal(names(dt), c("x", "y"))
+
+
+  gdt <- dt %.% group_by(x)
+  expect_equal(names(select(gdt, x, z = y)), c("x", "z"))
+  expect_equal(names(gdt), c("x", "y"))
+})
+
+test_that("select can be before group_by (#309)",{
+  df <- data.frame(id=c(1,1,2,2,2,3,3,4,4,5), year=c(2013,2013,2012,2013,2013,2013,2012,2012,2013,2013), var1=rnorm(10))
+  dfagg <- df %.%
+    group_by(id, year) %.%
+    select(id, year, var1) %.%
+    summarise(var1=mean(var1))
+  expect_equal(names(dfagg), c("id", "year", "var1"))
+  expect_equal(attr(dfagg, "vars" ), list(quote(id)))
+  
+})
diff --git a/man/join.tbl_sql.Rd b/man/join.tbl_sql.Rd
index dd6f604..16de4a2 100644
--- a/man/join.tbl_sql.Rd
+++ b/man/join.tbl_sql.Rd
@@ -93,7 +93,7 @@ hof <- tbl(lahman_sqlite(), "HallOfFame")
 semi_join(people, hof)
 
 # All people not in the hall of fame
-semi_join(people, hof, anti = TRUE)
+anti_join(people, hof)
 
 # Find all managers
 manager <- tbl(lahman_sqlite(), "Managers")
diff --git a/src/dplyr.cpp b/src/dplyr.cpp
index 3435881..f20d3f7 100644
--- a/src/dplyr.cpp
+++ b/src/dplyr.cpp
@@ -1387,21 +1387,23 @@ DataFrame select_grouped( GroupedDataFrame gdf, const CharacterVector& keep, Cha
   // handle vars  attribute : make a shallow copy of the list and alter 
   //   its names attribute
   List vars = shallow_copy( copy.attr("vars") ); 
+  
   int nv = vars.size() ;
   for( int i=0; i<nv; i++){
     SEXP s = PRINTNAME(vars[i]) ;
     int j = 0; 
     for( ; j < n; j++){
       if( s == keep[j] ){
-        vars = Rf_install( CHAR(new_names[j]) );  
+        vars[j] = Rf_install( CHAR(new_names[j]) );  
       }
     }
   }
   copy.attr("vars") = vars ;
-  
+    
   // hangle labels attribute
   //   make a shallow copy of the data frame and alter its names attributes
-  if( !Rf_isNull( copy.attr("labels" ) ) ){   
+  if( !Rf_isNull( copy.attr("labels" ) ) ){
+      
     DataFrame original_labels( copy.attr("labels" ) ) ;
     
     DataFrame labels = shallow_copy(original_labels) ;
@@ -1416,7 +1418,6 @@ DataFrame select_grouped( GroupedDataFrame gdf, const CharacterVector& keep, Cha
     labels.attr("vars") = vars ;
     copy.attr("labels") = labels ;
   }
-  
   return copy ;
 }
 
diff --git a/vignettes/databases.Rmd b/vignettes/databases.Rmd
index 4d696d7..e2e98cd 100644
--- a/vignettes/databases.Rmd
+++ b/vignettes/databases.Rmd
@@ -19,9 +19,9 @@ As well as working with local in-memory data like data frames and data tables, d
 
 Since R almost exclusively works with in-memory data, if you do have a lot of data in a database, you can't just dump it into R. Instead, you'll have to work with subsets or aggregates, and dplyr aims to make that as easy as possible. If you're working with large data, it's also likely that you'll need support to get the data into the database and to ensure you have the right indices for good performance. dplyr provides some simple tools to help with these tasks but they are no substitute for a local expert.
 
-The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates `SELECT` statements, the SQL you write most often as an analyst. 
+The motivation for supporting databases in dplyr is that you never pull down the right subset or aggregate from the database the first time, and usually you have to iterate between R and SQL many times before you get the perfect dataset. Switching between languages is cognitively challenging (especially because R and SQL are so perilously similar), so dplyr allows you to write R code that is automatically translated to SQL. The goal of dplyr is not to replace every SQL function with an R function: that would be difficult and error prone. Instead, dplyr only generates `SELECT` statements, the SQL you write most often as an analyst.
 
-To get the most out of this chapter, you'll need to be familiar with querying SQL databases using the `SELECT` statement.
+To get the most out of this chapter, you'll need to be familiar with querying SQL databases using the `SELECT` statement.  If you have some familiarity with SQL and you'd like to learn more, I found   [how indexes work in SQLite](http://www.sqlite.org/queryplanner.html) and [10 easy steps to a complete understanding of SQL](http://tech.pro/tutorial/1555/10-easy-steps-to-a-complete-understanding-of-sql) to be particularly helpful.
 
 ## Getting started
 
@@ -88,7 +88,7 @@ c3 <- mutate(c2, Speed = Distance / AirTime * 60)
 c4 <- arrange(c3, Year, Month, DayofMonth, UniqueCarrier)
 ```
 
-Suprisingly, this sequence of operations never actually touches the database. It's not until you ask for the data (e.g. by printing `c4`) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows. 
+Suprisingly, this sequence of operations never actually touches the database. It's not until you ask for the data (e.g. by printing `c4`) that dplyr generates the SQL and requests the results from the database, and even then it only pulls down 10 rows.
 
 ```{r}
 c4
@@ -118,7 +118,7 @@ There are three ways to force the computation of a query:
 
 * `collect()` executes the query and returns the results to R.
 
-* `compute()` executes the query and stores the results in a temporary table 
+* `compute()` executes the query and stores the results in a temporary table
   in the database.
 
 * `collapse()` turns the query into a table expresion.
@@ -129,17 +129,17 @@ You are most likely to use `collect()`: once you have interactively converged on
 
 dplyr tries to prevent you from accidentally performing expensive query operations:
 
-* `nrow()` is always `NA`: in general, there's no way to determine how 
+* `nrow()` is always `NA`: in general, there's no way to determine how
   many rows a query will return unless you actually run it.
 
 * Printing a tbl only runs the query enough to get the first 10 rows
 
-* You can use `tail()` on database tbls: you can't find the last rows 
-  without executing the whole query. 
+* You can use `tail()` on database tbls: you can't find the last rows
+  without executing the whole query.
 
 ## SQL translation
 
-When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it's relatively straightforward to translate R code to SQL (or indeed to any programming language). 
+When doing simple mathematical operations of the form you normally use when filtering, mutating and summarising it's relatively straightforward to translate R code to SQL (or indeed to any programming language).
 
 To experiment with the translation, use `translate_sql()`. The following examples work through some basic differences between R and SQL.
 
@@ -164,8 +164,8 @@ translate_sql(1L)
 dplyr knows how to convert the following R functions to SQL:
 
 * basic math operators: `+`, `-`, `*`, `/`, `%%`, `^`
-* math functions: `abs`, `acos`, `acosh`, `asin`, `asinh`, `atan`, `atan2`, 
-  `atanh`, `ceiling`, `cos`, `cosh`, `cot`, `coth`, `exp`, `floor`, 
+* math functions: `abs`, `acos`, `acosh`, `asin`, `asinh`, `atan`, `atan2`,
+  `atanh`, `ceiling`, `cos`, `cosh`, `cot`, `coth`, `exp`, `floor`,
   `log`, `log10`, `round`, `sign`, `sin`, `sinh`, `sqrt`, `tan`, `tanh`
 * logical comparisons: `<`, `<=`, `!=`, `>=`, `>`, `==`, `%in%`
 * boolean operations: `&`, `&&`, `|`, `||`, `!`, `xor`
@@ -183,7 +183,7 @@ translate_sql(mean(x, trim = T))
 Any function that dplyr does't know how to convert it leaves as is - that means if you want to use any other function that database provides, you can use it as is. Here a couple of examples that will work with [SQLite](http://www.sqlite.org/lang_corefunc.html):
 
 ```{r}
-translate_sql(glob(x, y)) 
+translate_sql(glob(x, y))
 translate_sql(x %like% "ab*")
 ```
 
@@ -193,7 +193,7 @@ SQLite lacks window functions, which are needed for grouped mutation and filteri
 
 ```{r}
 planes <- group_by(hflights_sqlite, TailNum)
-delay <- summarise(planes, 
+delay <- summarise(planes,
   count = n(),
   dist = mean(Distance),
   delay = mean(ArrDelay)
@@ -231,12 +231,12 @@ The following examples shows the grouped filter and mutate possible with Postgre
 ```{r}
 if (has_lahman("postgres")) {
   daily <- group_by(hflights_postgres, Year, Month, DayofMonth)
-  
+
   # Find the most and least delayed flight each day
-  bestworst <- filter(daily, ArrDelay == min(ArrDelay) || 
+  bestworst <- filter(daily, ArrDelay == min(ArrDelay) ||
     ArrDelay == max(ArrDelay))
   bestworst$query
-  
+
   # Rank each flight within a daily
   ranked <- mutate(daily, rank = rank(desc(ArrDelay)))
   ranked$query
@@ -253,7 +253,7 @@ In terms of functionality, MySQL lies somewhere between SQLite and PostgreSQL. I
 
 Bigquery is a hosted database server provided by google. To connect, you need to provide your `project`, `dataset` and optionally a project for `billing` (if billing for `project` isn't enabled). After you create the src, your web browser will open and ask you to authenticate. Your credentials are stored in a local cache, so you should only need to do this once.
 
-Bigquery supports only a single SQL statement: [SELECT](https://developers.google.com/bigquery/query-reference). Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql. 
+Bigquery supports only a single SQL statement: [SELECT](https://developers.google.com/bigquery/query-reference). Fortunately this is all you need for data analysis, and within SELECT bigquery provides comprehensive coverage similar level to postgresql.
 
 ## Picking a database
 
diff --git a/vignettes/introduction.Rmd b/vignettes/introduction.Rmd
index 90942bf..ad50a90 100644
--- a/vignettes/introduction.Rmd
+++ b/vignettes/introduction.Rmd
@@ -269,7 +269,7 @@ filter(
 )
 ```
 
-This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the `%.%` operator. `x %.% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations so you can read from left-to-riht, top-to-bottom:
+This is difficult to read because the order of the operations is from inside to out, and the arguments are a long way away from the function. To get around this problem, dplyr provides the `%.%` operator. `x %.% f(y)` turns into `f(x, y)` so you can use it to rewrite multiple operations so you can read from left-to-right, top-to-bottom:
 
 ```{r, eval = FALSE}
 hflights %.%
@@ -309,11 +309,11 @@ Compared to DBI and the database connection algorithms:
 
 * it hides, as much as possible, the fact that you're working with a remote database
 * you don't need to know any sql (although it helps!)
-* it shims over the many differences between the difference DBI implementations
+* it shims over the many differences between the different DBI implementations
 
 ## Multidimensional arrays / cubes
 
-`tbl_cube()` provides an experimental interface to multidimenssional arrays or data cubes. If you're using this form of data in R, please get in touch so I can better understand your needs.
+`tbl_cube()` provides an experimental interface to multidimensional arrays or data cubes. If you're using this form of data in R, please get in touch so I can better understand your needs.
 
 # Comparisons
 
diff --git a/vignettes/window-functions.Rmd b/vignettes/window-functions.Rmd
index 3dd3997..ba2d595 100644
--- a/vignettes/window-functions.Rmd
+++ b/vignettes/window-functions.Rmd
@@ -20,7 +20,7 @@ batting <- select(tbl_df(Batting), playerID, yearID, teamID, G, AB:H)
 batting <- arrange(batting, playerID, yearID, teamID)
 players <- group_by(batting, playerID)
 
-# For each player, find the two years with most home runs
+# For each player, find the two years with most hits
 filter(players, min_rank(desc(H)) <= 2 & H > 0)
 # Within each player, rank each year by the number of games played
 mutate(players, G_rank = min_rank(G))
@@ -137,7 +137,7 @@ You can use them to:
   
     ```{r, results = "hide"}
     # Find when a player changed teams
-    filter(players, teamID != lag(teamID)); TRUE
+    filter(players, teamID != lag(teamID))
     ```
 
 `lead()` and `lag()` have an optional argument `order_by`. If set, instead of using the row order to determine which value comes before another, they will use another variable. This important if you have not already sorted the data, or you want to sort one way and lag another.