Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
Choose a Base Repository
cockroachdb/cockroach
AALEKH/cockroach
Abioy/cockroach
AflenChen/cockroach
Arifur794/cockroach
CodEnFisH/cockroach
DilipLukose/cockroach
El-Coder/cockroach
Frank-Jin/cockroach
GavinHwa/cockroach
GokulSrinivas/cockroach
GrayMissing/cockroach
HanumathRao/cockroach
HengWang/cockroach
HunterChen/cockroach
InsaneYoungStunner/cockroach
Kevin-GuanJian/cockroach
Linicks/cockroach
PragashSiva/cockroach
RaduBerinde/cockroach
SandyZeng/cockroach
Viewtiful/cockroach
XuWanHong/cockroach-1
Zemnmez/cockroach
a-robinson/cockroach
abhishekgahlot/cockroach
alex/cockroach
alisheikh/cockroach
anchal-agrawal/cockroach
andradeandrey/cockroach
angel1991521/cockroach
ansonism/cockroach
axfcampos/cockroach
banks/cockroach
bdarnell/cockroach
bdotdub/cockroach
bigrats/cockroach
bigxing/cockroach
bobpattersonjr/cockroach
bowlofstew/cockroach
brandenyoon/cockroach
briliant1991/cockroach
bussiere/cockroach
bydsky/cockroach
cDoru/cockroach
cainiao1989/cockroach
cdsalmons/cockroach
chagge/cockroach
chunshengster/cockroach
cleverdeng/cockroach
clm971910/cockroach
cn15810092493/cockroach
connecteev/cockroach
dallasmarlow/cockroach
darkseed/cockroach
db-production/cockroach
dfrsg/cockroach
diegode/cockroach
domluna/cockroach
eagle518/cockroach
easyfmxu/cockroach
eclectice/cockroach
elvin-du/cockroach
embark/cockroach
erriapo/cockroach
es-chow/cockroach
esaul/cockroach
flyingliang/cockroach
gaowenbin/cockroach
ghostsun/cockroach
gqf2008/cockroach
grimreaper/cockroach
gstarnberger/cockroach
gude/cockroach
guiquanz/cockroach
hannibalhuang/cockroach
hanshenu/cockroach
hanwoody/cockroach
hcxiong/cockroach
hollis/cockroach
hubt/cockroach
hunslater/cockroach
iamima/cockroach
icattlecoder/cockroach
ikarzali/cockroach
ilovejs/cockroach
jackylk/cockroach
jamesgraves/cockroach
jamiepg1/cockroach
jay23jack/cockroach
jess-edwards/cockroach
jinguoxing/cockroach
jmank88/cockroach
joezxy/cockroach
joliny/cockroach
jonathanmarvens/cockroach
josephwinston/cockroach
josephyzhou/cockroach
joshuawatson/cockroach
jrcjc123/cockroach
jsanc623/cockroach
kanasite/cockroach
kebohiki/cockroach
kkaneda/cockroach
kortschak/cockroach
kritivasas/cockroach
kuguobing/cockroach
lemonhall/cockroach
leomzhong/cockroach
lessc0de/cockroach
lianhuiwang/cockroach
liuzongquan/cockroach
lostz/cockroach
lshmouse/cockroach
luan-cestari/cockroach
lupengfeige/cockroach
mabdullah353/cockroach
mackjoner/cockroach
maniksurtani/cockroach
manithnuon/cockroach
markreg/cockroach
matadorhong/cockroach
meshileya/cockroach
mindis/cockroach
mixiong/cockroach
mjibson/cockroach
mobilipia/cockroach
mohae/cockroach
mrunix/cockroach
msmakhlouf/cockroach
nanderoo/cockroach
neuroradiology/cockroach
neutony/cockroach
nikelius/cockroach
nimishzynga/cockroach
nkgfirecream/cockroach
nmarasoiu/cockroach
ofonimefrancis/cockroach
oldmantaiter/cockroach
ollyblue/cockroach
petermattis/cockroach
picolonet/storage
pinterb/cockroach
pramendra/cockroach
putaozhuose/cockroach
r00tjimmy/cockroach
ramgtv/cockroach
rayleyva/cockroach
sandeepmukho/cockroach
sawanoboly/cockroach
scrooph/cockroach
sdboyer/cockroach
shafiahmed/cockroach
shanyechen/cockroach
shilezi/cockroach
silky/cockroach
slavau/cockroach
sunya123/cockroach
superneo/cockroach
swarbiv/cockroach
sxhao/cockroach
tamird/cockroach
therob3000/cockroach
timwee/cockroach
tml/cockroach
tomzhang/cockroach
toshisam/cockroach
trebogeer/cockroach
treemantris/cockroach
tristartom/cockroach
truthwzl/cockroach
tschottdorf/cockroach
udybrill/cockroach
umegaya/cockroach
vikram/cockroach
vivekmenezes/cockroach
vvydier/cockroach
waderly/cockroach
walkingsparrow/cockroach
wangtuanjie/cockroach
wheelcomplex/cockroach
willmadison/cockroach
wulinjun4/cockroach
wuyu201321060203/cockroach
wycg1984/cockroach
xiaoyulei/cockroach
yacki/cockroach
yananzhi/cockroach
yangyaoweng/cockroach
yanniyang/cockroach
yekeqiang/cockroach
yemaocheng/cockroach
yonglehou/cockroach
zeeshanali/cockroach
zhaixuezhong/cockroach
zhangchn/cockroach
zhanglei/cockroach
zhonghai/cockroach
zimmermamc/cockroach
zofuthan/cockroach
Nothing to show
Choose a Head Repository
cockroachdb/cockroach
AALEKH/cockroach
Abioy/cockroach
AflenChen/cockroach
Arifur794/cockroach
CodEnFisH/cockroach
DilipLukose/cockroach
El-Coder/cockroach
Frank-Jin/cockroach
GavinHwa/cockroach
GokulSrinivas/cockroach
GrayMissing/cockroach
HanumathRao/cockroach
HengWang/cockroach
HunterChen/cockroach
InsaneYoungStunner/cockroach
Kevin-GuanJian/cockroach
Linicks/cockroach
PragashSiva/cockroach
RaduBerinde/cockroach
SandyZeng/cockroach
Viewtiful/cockroach
XuWanHong/cockroach-1
Zemnmez/cockroach
a-robinson/cockroach
abhishekgahlot/cockroach
alex/cockroach
alisheikh/cockroach
anchal-agrawal/cockroach
andradeandrey/cockroach
angel1991521/cockroach
ansonism/cockroach
axfcampos/cockroach
banks/cockroach
bdarnell/cockroach
bdotdub/cockroach
bigrats/cockroach
bigxing/cockroach
bobpattersonjr/cockroach
bowlofstew/cockroach
brandenyoon/cockroach
briliant1991/cockroach
bussiere/cockroach
bydsky/cockroach
cDoru/cockroach
cainiao1989/cockroach
cdsalmons/cockroach
chagge/cockroach
chunshengster/cockroach
cleverdeng/cockroach
clm971910/cockroach
cn15810092493/cockroach
connecteev/cockroach
dallasmarlow/cockroach
darkseed/cockroach
db-production/cockroach
dfrsg/cockroach
diegode/cockroach
domluna/cockroach
eagle518/cockroach
easyfmxu/cockroach
eclectice/cockroach
elvin-du/cockroach
embark/cockroach
erriapo/cockroach
es-chow/cockroach
esaul/cockroach
flyingliang/cockroach
gaowenbin/cockroach
ghostsun/cockroach
gqf2008/cockroach
grimreaper/cockroach
gstarnberger/cockroach
gude/cockroach
guiquanz/cockroach
hannibalhuang/cockroach
hanshenu/cockroach
hanwoody/cockroach
hcxiong/cockroach
hollis/cockroach
hubt/cockroach
hunslater/cockroach
iamima/cockroach
icattlecoder/cockroach
ikarzali/cockroach
ilovejs/cockroach
jackylk/cockroach
jamesgraves/cockroach
jamiepg1/cockroach
jay23jack/cockroach
jess-edwards/cockroach
jinguoxing/cockroach
jmank88/cockroach
joezxy/cockroach
joliny/cockroach
jonathanmarvens/cockroach
josephwinston/cockroach
josephyzhou/cockroach
joshuawatson/cockroach
jrcjc123/cockroach
jsanc623/cockroach
kanasite/cockroach
kebohiki/cockroach
kkaneda/cockroach
kortschak/cockroach
kritivasas/cockroach
kuguobing/cockroach
lemonhall/cockroach
leomzhong/cockroach
lessc0de/cockroach
lianhuiwang/cockroach
liuzongquan/cockroach
lostz/cockroach
lshmouse/cockroach
luan-cestari/cockroach
lupengfeige/cockroach
mabdullah353/cockroach
mackjoner/cockroach
maniksurtani/cockroach
manithnuon/cockroach
markreg/cockroach
matadorhong/cockroach
meshileya/cockroach
mindis/cockroach
mixiong/cockroach
mjibson/cockroach
mobilipia/cockroach
mohae/cockroach
mrunix/cockroach
msmakhlouf/cockroach
nanderoo/cockroach
neuroradiology/cockroach
neutony/cockroach
nikelius/cockroach
nimishzynga/cockroach
nkgfirecream/cockroach
nmarasoiu/cockroach
ofonimefrancis/cockroach
oldmantaiter/cockroach
ollyblue/cockroach
petermattis/cockroach
picolonet/storage
pinterb/cockroach
pramendra/cockroach
putaozhuose/cockroach
r00tjimmy/cockroach
ramgtv/cockroach
rayleyva/cockroach
sandeepmukho/cockroach
sawanoboly/cockroach
scrooph/cockroach
sdboyer/cockroach
shafiahmed/cockroach
shanyechen/cockroach
shilezi/cockroach
silky/cockroach
slavau/cockroach
sunya123/cockroach
superneo/cockroach
swarbiv/cockroach
sxhao/cockroach
tamird/cockroach
therob3000/cockroach
timwee/cockroach
tml/cockroach
tomzhang/cockroach
toshisam/cockroach
trebogeer/cockroach
treemantris/cockroach
tristartom/cockroach
truthwzl/cockroach
tschottdorf/cockroach
udybrill/cockroach
umegaya/cockroach
vikram/cockroach
vivekmenezes/cockroach
vvydier/cockroach
waderly/cockroach
walkingsparrow/cockroach
wangtuanjie/cockroach
wheelcomplex/cockroach
willmadison/cockroach
wulinjun4/cockroach
wuyu201321060203/cockroach
wycg1984/cockroach
xiaoyulei/cockroach
yacki/cockroach
yananzhi/cockroach
yangyaoweng/cockroach
yanniyang/cockroach
yekeqiang/cockroach
yemaocheng/cockroach
yonglehou/cockroach
zeeshanali/cockroach
zhaixuezhong/cockroach
zhangchn/cockroach
zhanglei/cockroach
zhonghai/cockroach
zimmermamc/cockroach
zofuthan/cockroach
Nothing to show
Checking mergeability… Don’t worry, you can still create the pull request.
This comparison is big! We’re only showing the most recent 250 commits
Commits on Oct 10, 2017
Merge pull request #19143 from andreimatei/cherrypick-sql-kv-tracing
cherry-pick 1.1: sql: don't enable "kv tracing" for SHOW TRACE FOR <stmt>
Commits on Oct 12, 2017
knz
sql: revert the default behavior of DROP DATABASE to CASCADE
... because that's what DROP DATABASE does in postgres, and schema
migration tools expect it to work this way.

However, in order to avoid unpleasant surprises by user, make
DROP DATABASE *without* behavior specifier also error out
when the session variable `sql_safe_updates` is set. This is
the case e.g. in interactive shells by default. For example:

```
root@:26257/> drop database t;
pq: rejected: DROP DATABASE on non-empty database without explicit CASCADE (sql_safe_updates = true)
```
release-1.1: cluster: properly introduce cluster version v1.1
We forgot to introduce a proper version for v1.1.0. As a result, clusters
running our v1.1 release identify as v1.0-3, which in turn means that the
upgrade instructions do not work:

https://www.cockroachlabs.com/docs/v1.1/upgrade-cockroach-version.html

(1.1 needs to be replaced by 1.0-3).

We've already released v1.1.0 so unless we decide that we want to yank the relase
(don't think it's necessary) this will only become active in v1.1.1 (before
the release of which we should add another version, v1.1.1). We should think
about linting this or at least making its verification a part of the release
checklist, too.

cc @bdarnell @jseldess
Merge pull request #18974 from a-robinson/cherrypick_18957
cherrypick-1.1: server: Fix --advertise-port
Commits on Oct 13, 2017
Merge pull request #19225 from tschottdorf/unscrew-versions
release-1.1: cluster: properly introduce cluster version v1.1
Commits on Oct 14, 2017
Merge pull request #18980 from BramGruneir/cherrypick-18900
Cherrypick 1.1:  server, ui: add quiescent to the debug page
Commits on Oct 16, 2017
sql: support casting strings to arrays
Fixes #18419.

Support expressions like '{1,2,3}'::INT[], or '{abc,"xyz"}'::STRING[].

More testing, incl. fuzzing might be necessary.
I'm also iffy on the current ad-hoc construction of an EvalContext for
the purposes of having a location.
sql: support anyarray oid for parsing
Used by several drivers, including the node driver and the go driver.
cloud: Update kubernetes configs for v1.1
Being sure to add on the cache and max-sql-memory flags that are now
important for production-ready configs.
sql: support arrays in pgwire
When adding arrays, I overlooked filling out the relevant pg_catalog
entries to support array types. This commit fixes this and also handles
the incoming arrays in pgwire.
lgo
Merge pull request #19274 from lego/kubernetes-1.1
release-1.1: cloud: Update kubernetes configs for v1.1
Merge pull request #19273 from justinj/cp-array-pgwire
cherry-pick 1.1: handle arrays in pgwire properly
dt
sqlccl: pass needed spans to prev backup checks
Previously we validated previous backups were well-formed and ordered.
We did not, however, pass the spans which needed to be covered, the way we do
when running the matching check in RESTORE, meaning that we'd accept a set of
previous backups that didn't actually cover the spans being backed up.

Re-ordering the steps in the backup plan slightly computes the matching tables
and spans before validating the prior backups (and setting the start time), thus
catching that case where the set of tables (and thus the spans for which we need
complete history in order to restore) has changed.

Another potential approach would be to automatically change the startTime for
the spans for which we are missing history, effectiely de-incrementalizing those
tables. This opens up significant additional complexity though. A simple error
at BACKUP time should at least indicate there is an issue right away, rather
than letting an operator believe they are making usable BACKUPs that cannot
actually be RESTOREd.
knz
Merge pull request #19281 from knz/20171016-cherrypick-19126
cherrypick-1.1: cliflags: list the missing format in the help text
Merge pull request #19285 from justinj/cp-escape
cherry-pick 1.1: sql: handle escape sequences at start of arrays
dt
Merge pull request #19286 from dt/cherrypick-inc
cherrypick-1.1: sqlccl: pass needed spans to prev backup checks
util/log: don't panic
Previously, log.outputLogEntry could panic while holding the log mutex.
This would deadlock any goroutine that logged while recovering from the
panic, which is approximately all of the recover routines. Most
annoyingly, the crash reporter would deadlock, swallowing the cause of
the panic.

Avoid panicking while holding the log mutex and use l.exit instead,
which exists for this very purpose. In the process, enforce the
invariant that l.mu is held when l.exit is called. (The previous
behavior was, in fact, incorrect, as l.flushAll should not be called
without holding l.mu.)

Also add a Tcl test to ensure this doesn't break in the future.
Merge pull request #19287 from benesch/1.1-dont-panic
cherrypick-1.1: util/log: don't panic
Commits on Oct 17, 2017
storage: declare AbortCache only when used
This ensures that we won't declare the AbortSpan key for a txn
when committing or moving intents.
storage: prevent accidentally unpoisoning aborted txn
Discovered by @bdarnell in #18635 (comment).
Making poisoning happen less often (to reduce contention) is planned but
requires more care.
log: improve error reporting
In particular, this should allow errors like "write: no space left on device" to
expose themselves on sentry.
Merge pull request #19311 from tschottdorf/cp-poison
cherrypick-1.1: AbortCache correctness + performance fixes from #19093
storage: avoid miscounting scan requests in TestStoreScanIntents
Previously, in TestScoreScanIntents, if the race detector slowed things
down sufficiently, a periodic gossip would be triggered before the test
completed. This gossip would generate scan requests that were included
in the test's bookkeeping, causing spurious failures.

Filter out scan requests that aren't generated by the test by using a
unique prefix for the test's keys. Enabling the DisablePeriodicGossip
testing knob would have the same effect, but this approach is more
robust against future features that might generate background scan
requests.
server: allow more than one lease in TestRange[s]Response
If the server is slow to boot, leases might be renewed and generate
additional history entries. Loosen the assertion of the number of
history entries in TestRangeResponse and TestRangesResponse from
"exactly one history entry" to "at least one history entry."
Commits on Oct 18, 2017
Merge pull request #19324 from benesch/1.1-flakiness
cherrypick-1.1: combat some test flakiness
Merge pull request #19314 from tschottdorf/cp-reporting
cherrypick-1.1: log: improve error reporting
Commits on Oct 19, 2017
knz
Merge pull request #19209 from knz/20171012-cherrypick-19182
cherry-pick 1.1: sql: revert the default behavior of DROP DATABASE to CASCADE
Commits on Oct 20, 2017
cli: simplify and improve dump performance
A significant percentage (near 50%) of CPU time during dump is spent
converting strings returned from the SELECT into SQL-safe strings and
writing that to the output. Much of this work can be done concurrently,
which improves the performance, and gets it close to that of piping
a `SELECT *` to a file. This is done using an errgroup and passing
various stages over channels.

We can also greatly simplify dump now since cockroach streams
results. There's no need anymore to page through the data.

See cockroachdb/docs#1674
cli: test dump with non-default PK name
Although this test is already passing, it is being added to master
so we don't regress and so it can be backported to 1.1, where it
is failing.

See #18500
Merge pull request #19400 from mjibson/cherrypick-18472
cherrypick-1.1: cli: simplify and improve dump performance
sql: don't set isKey if columns are nullable
We assume that the columns of an index form a "key" but that is only correct if
the columns are not null.

Fixes #19343.
Commits on Oct 21, 2017
storage: avoid some liveness clobbering
Sharpen the conditions under which Gossip updates overwrite the existing
liveness entry. I believe this clobbering doesn't happen in practice,
but this commit also prepares the code to be reusable in other locations
that are known to clobber.

See #18219.
storage: remove `self` from liveness
Storing entries for the own node in `self` but also having the `nodes` map
was confusing, and the original motivation, if any, was hard to come by.
It seems more straightforward to treat the own node like any other, and to
handle out-of-order updates by comparing livenesses. Admittedly this has
some shortcomings, but especially for the own entry, these appear to be
acceptable.
Merge pull request #19419 from RaduBerinde/fix-key-1.1
cherry-pick 1.1: sql: don't set isKey if columns are nullable
Commits on Oct 22, 2017
storage: order liveness update criteria
Though Expiration should never move backwards in practice, it is a
theoretical possibility (at least when using clockless reads).
Use an ordering that prioritizes Epoch to address that.
storage: de-flake TestNodeIsLiveCallback
This test exercised that liveness callbacks fired after a node's liveness
changed from false to true. However, the trigger was tied to Gossip updates
only, but the liveness also updated "eagerly" after the heartbeat.

Recent refactorings made it much more likely that the eager update took
precedence, in which case the Gossip handler would avoid updating a
second time, forgetting the callbacks as a consequence.

This was addressed by making callback invocation the duty of the code
that ingests livenesses.

The test was extremely flaky before, and passed several thousand iterations as
of this commit.

Fixes #19344.
storage: deflake TestNodeHeartbeatCallback
The node could become live by observing the Gossip update (triggered by its
heartbeat write) before getting to the point in `heartbeatInternal` at which
it would invoke the heartbeat callback. As a result, one location in this
test needs to be prepared to wait for a split second.

Flaky in the hundreds before, now not flaky with 2k+ iterations.

Fixes #19362.
storage: de-flake TestNodeLiveness
Heartbeat() is not guaranteed to return success.

Fixes #18340.
Commits on Oct 23, 2017
sql: fix repeated escapes in array parsing
Previously, we wouldn't parse properly arrays containing strings with
multiple escapes in a row. This commit fixes that.

Also added a random generator, which catches the errors we had before.

Going to cherry-pick for 1.1.2.
Merge pull request #19452 from justinj/cp-repeated-escapes
cherry-pick 1.1: sql: fix repeated escapes in array parsing
Merge pull request #19422 from tschottdorf/cp-liveness
cherrypick-1.1: less clobbering in node liveness
sql: fix double stopTracing() call
Release notes: Fix a spurious error log when running SHOW TRACE FOR.

stopTracing() was being called twice by SHOW TRACE FOR - once when the
traceNode was done consuming the real query's results (in
traceNode.Nesxt()), and once in traceNode.Close(). The second call was
causing an error to be logged complaining that we're no (longer)
tracing.
Commits on Oct 24, 2017
Merge pull request #19469 from andreimatei/sql-fix-trace
cherry-pick 1.1 sql: fix double stopTracing() call
storage: report failed raft command application
Previously, if `applyRaftCommand` returned an error, it would mark the replica
as corrupt but then go on and execute the side effects and potentially the
assertions in `assertState()`. These were then likely to fail and return a
misleading error, as likely seen in #16004.

Instead, cause a fatal error right when observing the error, and potentially
capture the root cause on sentry.io. The (perhaps too optimisic) expectation
is that after accounting these disk corruption/space errors, there will be
much fewer (possibly no) reports triggered by `assertState()`.

Touches #16004.
storage: return error from split/merge lock acquisition
Also report the error to sentry because we suspect it to have caused
the bug referenced below in versions of CockroachDB not running with
this commit.

See #19172.
storage: Switch an occasionally spammy log.Infof to log.VEventf
These were never meant to be log.Infof -- I must have removed the
log.V(2) wrapper from around them and forgetten to switch them to
log.VEventf. This can create a bunch of log message when a cluster
becomes unbalanced, such as when a new node joins.
Merge pull request #19494 from a-robinson/cherrypick_19491
cherrypick-1.1: storage: Switch an occasionally spammy log.Infof to log.VEventf
cherrypick-1.1: storageccl: use the official AWS SDK (#19473)
cherrypick-1.1: storageccl: use the official AWS SDK

This change is similar to #18902 except that it:
- omits the Size method and tests
- ensures that there are no changes to vendor, only Gopkg.lock

There are various other changes in Gopkg.lock which should have
already been in 1.1, but are included here for correctness.

See #18902
Fixes #19435
Merge pull request #19484 from tschottdorf/cp11-errors
cherrypick-1.1: storage: split & raft error improvements
Commits on Oct 25, 2017
sql,cli: support arrays in dump
Fixes #19487.

I neglected to add the array handling case to dump. This commit adds it.
Added an `EachColType` function that will hopefully be able to kept up
to date which can be used to make tests which will continue to test
new column types as more get added.
log: special-case Wrap(safe, "foo")
Extend the previous special case to allow reporting a wrapped
error verbatim if it is the only argument.

This should allow the changes in #19447 to *actually* report the wrapped
error including its type (before the string would be reported, but losing
the wrapped type).
Commits on Oct 26, 2017
Merge pull request #19550 from tschottdorf/safe-log
cherrypick-1.1: log: special-case Wrap(safe, "foo")
dt
settings: avoid multiple *settings.Values at runtime
This eliminates all but one non-testing call to MakeClusterSettings -- which is critical to correctness.
Any *settings.Values other than the one maintained by the refresh loop will be stale/lies, so only in
testing do we want call sites creating settings value containers.

Fixes #19533.
Merge pull request #19544 from RaduBerinde/fix-join-anon-alias-1.1
cherrypick-1.1: sql: fix anonymous alias set up for natural join
Merge pull request #19526 from justinj/cp-dump-array
cherry-pick 1.1: sql,cli: support arrays in dump
dt
Merge pull request #19563 from dt/cherrypick-settings
cherrypick-1.1: settings: avoid multiple *settings.Values at runtime
Merge pull request #19585 from jordanlewis/cp-ren-random-uuid
cherrypick-1.1: sql: add builtin function for random UUID generation
Commits on Oct 29, 2017
Commits on Oct 30, 2017
Merge pull request #19626 from tschottdorf/cp11-staggered-version
cherrypick-1.1: ui: exclude decommissioned nodes from staggered version warning
Merge pull request #19642 from tschottdorf/embedded
release-1.1: ui: regenerate embedded.go
cherrypick-1.1: sqlccl: cleanup failed or canceled RESTORE data
Previously, if a RESTORE failed or was manually canceled, any data
it had committed would have been orphaned, and forever taken up
space. Since there was no table descriptor, nothing would have been
able to see or delete it.

Add optional callbacks in jobs code to allow for specific jobs
to implement an onFail method. Add the job's client.Txn to the
update method and this onFail callback to allow for a job to be
transactionally failed and cleaned up.

Release note: Cleanup partially restored data when a RESTORE fails
or is canceled.

This PR is from #19578 but with all of the settings passing removed,
since 1.1 doesn't have any cluster settings for backup/restore.

Fixes #19398
Fixes #17123
Commits on Oct 31, 2017
Merge pull request #19653 from mjibson/cherrypick-19578
cherrypick-1.1: sqlccl: cleanup failed or canceled RESTORE data
sql: prefer hard limit
We are seeing a case where we have a soft limit coming from above a limit node,
and a sort node underneath, something along the lines of:
  SELECT DISTINCT (<some query> ORDER BY x LIMIT 100) ORDER BY x LIMIT 25

We are using the soft limit because it's smaller, but this makes the sort node
store all the rows in memory. The hard limit avoids this.

Switch to preferring the hard limit.

Fixes #19677.

Release Note: Improved memory usage for certain queries that use limits at
multiple levels.
Merge pull request #19688 from RaduBerinde/hard-limit-1.1
cherry-pick 1.1: sql: prefer hard limit
Commits on Nov 02, 2017
sql: pre-evaluate arguments to set
Run Eval() on all arguments passed to Set before beginning the Set plan.
This is necessary because the implementations of all of the different
variable setters don't get passed the proper EvalContext, and therefore
won't be able to properly resolve Placeholders once they become leaf
values.

This wouldn't be necessary if the Set implementations were able to get
the correct EvalContext, but doing that properly would require a larger
refactor.
Merge pull request #19750 from jordanlewis/cp-pre-eval-set
cherrypick-1.1: sql: pre-evaluate arguments to set
Commits on Nov 03, 2017
build: Move issue-posting script from TC config to repo
This is cleaned up from the previous version and modified to work
for any project, not just merge-to-master.
Merge pull request #19774 from bdarnell/post-failures-1.1
cherrypick-1.1: build: Move issue-posting script from TC config to repo
Commits on Nov 06, 2017
distsqlrun: fix a double close of the merge joiner output
This patch fixes a bug that caused us to call ProducerDone() twice on
the output of a mergeJoiner in case outputting a matched row encountered
an error or a closed consumer. This was a panic.
knz
sql: ensure that DELETE on the fast path is still a valid data source
Prior to this patch, the DELETE statement without WHERE and RETURNING
would use the "fast path", that is, all the work being done in Start().

The code for this would however violate the `planNode` interface
contract: after the fast path is taken, the code would panic if the
`Next()` method is called. `planNode` specifies that `Next()` is
always callable after `Start()`, although perhaps it has nothing to
do.
knz
sql/parser: fix the grouping of a = ANY/SOME/ALL b <postfixop>
Prior to this patch, an expression of the form `a = ANY b <postfix>`,
for example `1 = ANY 2::INT` would be parsed as `(a = ANY b)
<postfix>` (`((1) = ANY (2))::INT`), not the required `a = ANY (b
<postfix>)` (`(1) = ANY ((2)::INT)`).

This is incorrect on its face, and peeking at the pg grammar indeed
different from what pg does: the right operand of an ANY/SOME/ALL
comparisons must be an `a_expr`, not a `d_expr`.
Merge pull request #19828 from andreimatei/cherry-pic-merge-joiner-do…
…uble-close

cherry-pick 1.1: distsqlrun: fix a double close of the merge joiner output
log: better error reporting
The previous code tended to lose the type when it mattered most, for example in
`errors.Wrap(unknownSentinelErr, "something")`, the output would have only an
empty string for the unknown sentinel error. This will now print its type
instead (which could still be useless, but it's better than nothing).
log: extract file:line from most opaque errors in reports
This makes previously completely opaque errors such as `errors.Errorf("%s",
"foo")` much more useful by redacting them to their file:line. In particular,
most `errors.Wrap` chains should essentially emit a piecemeal stack trace.

Note that this only works for the `pkg/errors` package, but we use that in most
places (as a drop-in for the `errors` package).
Commits on Nov 07, 2017
knz
Merge pull request #19830 from knz/20171106-cherrypick-19822
cherrypick-1.1: sql: ensure that DELETE on the fast path is still a valid data source
knz
Merge pull request #19831 from knz/20171106-cherrypick-19801
cherry-pick 1.1:  sql/parser: fix the grouping of a = ANY/SOME/ALL b <postfixop>
sql: fix KV TRACE crash with COUNT(*)
Also adding a test.

Fixes #19846.
cherry-pick 1.1: sql: fix panic with IN expressions and subqueries
Fixes #19770.

Previously, an IN expression with a subquery on the RHS was not
type-checked for if the column value matched the LHS, which would lead
to a panic in some cases.

This commit introduces a check in the case of an IN expression that the
type matches.

There is already such a check for explicit tuples, but these cannot be
folded into the same check, as the typechecking of the tuple case has
some specialized logic for inferring its types.
log: simplify crash report anonymization
We had an optimization in place that allowed `panic(err)` to report the
type of `err` instead of reporting a wrapper error. This optimization
incurred too much complexity to be worth it and is removed in this commit.
Merge pull request #19878 from justinj/cp-in
cherry-pick 1.1: sql: fix panic with IN expressions and subqueries
Merge pull request #19848 from tschottdorf/cp11-error-reporting
cherrypick-1.1: improve sentry error reporting
Merge pull request #19868 from RaduBerinde/kvfetcher-trace-crash-1.1
cherrypick-1.1: sql: fix KV TRACE crash with COUNT(*)
Commits on Nov 14, 2017
sql: fix span generation with IN
Issue #20035 exposes a bug in span generation. We have an IN with three values
and we only see one span.

The root cause is that we are appending to the same slices for each IN value,
which means that if the slice has some extra capacity, we overwrite the previous
value. The result is that we get multiple spans that are identical (which get
merged later into a single span).

Fixes #20035.

Release Note: Fixed a bug leading to incorrect results for queries with IN
constraints (in some cases).
Merge pull request #20040 from RaduBerinde/spans-bug-1.1
cherrypick-1.1: sql: fix span generation with IN
Commits on Nov 15, 2017
sqlbase: fix encoding of index value when using unspecified columns
prior to this fix we would treat an unspecified column as a NULL value
when encoding a column for an index key, but not when encoding it in the
index as the value part of the key:value index pair.

For the storing case, we would pick up the first specified value
for the unspecified value and encode it in the index creating a
completely broken index!

For the composite encoding case it would crash. See #20000

fixes #20000
Merge pull request #20074 from vivekmenezes/vivek/20001
cherrypick-1.1: sqlbase: fix encoding of index value when using unspecified columns
cherrypick-1.1: storage: do not GC large transactions
Cherry-pick of #19538.

Release notes (temporary workaround): avoid overloading the system during
cleanup of large transactions.

cc @cockroachdb/release
Commits on Nov 16, 2017
Merge pull request #20083 from tschottdorf/cp-gc-hotfix
cherrypick-1.1: storage: do not GC large transactions
storage: report stringified RocksDB error code
We are now seeing first error reports that have a RocksDB-originating error at their root,
so next it would be nice to see what classes of errors we see. We hope to see "IO error"
a lot, as that usually means out of disk.

See https://github.com/facebook/rocksdb/blob/master/include/rocksdb/status.h for a list
of status code messages that we can expect to see.

Release note: none
Merge pull request #20108 from tschottdorf/cp11-rocksdbreporting
cherrypick-1.1: storage: report stringified RocksDB error code
Commits on Nov 18, 2017
build: more robust checking for clean workspaces
The inverted conditional in the previous version of the check for a
clean workspace meant that if `git status --porcelain` failed (e.g., due
to a corrupted Git repository), the workspace was assumed to be clean.
This allowed a commit that forgot to update a generated file to sneak
onto a release branch.

Replace the conditional with one that is both more understandable and
robust against `git status` failures.

Fixes #19639.
Merge pull request #20139 from benesch/cherrypick-lint-clean
cherrypick-1.1: build: more robust checking for clean workspaces
Commits on Nov 20, 2017
sql: re-enable and add new Window function tests
Some commented out tests claimed to depend on #12482, but they work
without it thanks to recent improvements to `addOrReuseRenders`.

This commit also adds a window function test with an ORDER BY clause
over multiple columns.
sql: fix PARTITION BY multiple columns with window functions
Fixes #20143.

This has been broken since 278e2e7. Instead of adding to the
`partitionIdxs` slice, we were replacing it for each new partitioning
column.
sql: fix handling of errors on Flush
Release note: Fix a possible crash due to statements finishing execution
after the client connection has been closed.

The Executor Flush()es results to the client on occasions. Before this
patch, if the flush failed (because the client conn had died) we'd
always try to roll back the kv txn. If there was no kv txn (e.g. because
it was a COMMIT that we were flushing), the server would panic. This
patch makes the rollback conditional on the state.

Fixes #20007
sql: support SHOW TRACE FOR SELECT ... AS OF SYSTEM TIME
Release note: SHOW TRACE FOR SELECT ... AS OF SYSTEM TIME is now
supported.

Before this patch, the Executor wasn't recognizing the AS OF SYSTEM TIME
and so the SELECT was failing with a planning error.
Commits on Nov 21, 2017
sqlccl: correctly mutate table descriptors during resumed RESTORE
If a RESTORE was resumed after a pause or node failure, it would
attempt to regenerate the sql descriptors it was using. However it did
this in a way that was different than the first invocation that didn't
use the same resume codepath. This mutation was to remove the FK on
indexes if the user requested it. Move the mutation to a downstream
place so both invocation paths will mutate index descriptors in the
same way.

Release note: correctly resume RESTORE jobs that skip foreign keys.
Merge pull request #20183 from andreimatei/cherry-pick-fix-flush-err
cherry-pick 1.1: sql: fix handling of errors on Flush
Merge pull request #20187 from andreimatei/cherry-pick-show-trace-aost
cherry-pick 1.1: sql: support SHOW TRACE FOR SELECT ... AS OF SYSTEM TIME
Merge pull request #20156 from nvanbenschoten/nvanbenschoten/cherrypi…
…ck_20151

cherrypick-1.1: sql: fix PARTITION BY multiple columns with window functions
Merge pull request #20168 from mjibson/cherrypick-20092
cherrypick-1.1: sqlccl: correctly mutate table descriptors during resumed RESTORE
metrics: write vars to temporary buffer.
Fixes #20186.

We have seen cases of requests hanging (see #20118) while trying to
write the contents of `/_status/vars`. Since this was done while holding
the lock, all further requests failed.

This workaround allows a single connection to hang without blocking
future requests. It does nothing to detect/fix hangs and will actually
hide such issues by not causing a goroutine/filedescriptor blowup.
Merge pull request #20209 from mberhault/marc/cherrypick_20194
cherrypick-1.1: metrics: write vars to temporary buffer.
Commits on Nov 27, 2017
cherrypick-1.1: cli: dump: fix infinite loop in when resolving depend…
…encies.

Release Note (cli): fix panic on `cockroach dump` in the presence of
reference cycles.

While this will avoid the loop and keep dependency order, there may
still be problems with dependency cycles.

This came up in #20254 when encountering a foreign key referring to the
same table.

Before the fix, the test fails with:
```
runtime: goroutine stack exceeds 1000000000-byte limit
fatal error: stack overflow
```
cherrypick-1.1: skip generating empty sst
Release Node (rocksdb): don't write empty sstables (causes corruption).

Fixes #19891.
knz
sql: ensure that AS OF SYSTEM TIME is handled consistently
Prior to this patch, two inconsistencies were present in the code:

- a user-visible inconsistency: `AS OF SYSTEM TIME` (henceforth
  referred to as "AOST") was accepted anywhere in a query as soon as
  it was present at the top level SELECT statement, including with
  conflicting timestamps. Accepting AOST in multiple places doesn't
  feel undesirable, but allowing conflicting timestamps to be
  specified is unsound with the current underlying mechanism (a shared
  timestamp for the entire transaction).

- an internal inconsistency, visible to CockroachDB developers: the
  presence of an AOST clause during planning was erroneously conflated
  with the flag that disables caching of table descriptors
  (`planner.avoidCachedDescriptors`). This is erroneous because,
  although AOST *implies* `avoidCachedDescriptors == true` (we can't
  use the cache while time travelling), the converse is not true: when
  processing view descriptors (and perhaps, in the future, for other
  reasons), we also disable descriptor caching although AOST is not
  involved.

This patch rectifies this situation as follows:

- a new planner flag `asOfSystemTime` is introduced to indicate
  that an AOST clause was properly recognized at the top level.

- the logic that allows or refuses AOST clauses in FROM clauses in
  arbitrarily nested SELECT clauses (including, potentially, those
  expanded from views), is modified to use this new flag.

- the timestamps of AOST clauses, if multiple are specified, are
  checked to be equal to the one set at the top level. This
  restriction might be lifted in the future if we ever support
  different AOST clauses per data source.

In addition, the error message when an AOST clause is not given in the
proper syntactic position is improved to hint where it should be
placed instead.

----

Release note (sql, bug fix): it is not possible any more to indicate
conflicting `AS OF SYSTEM TIME` clauses in different part of a query.
1.1 only: distsqlrun: introduce --extra-1.0-compatibility
Introduce a new flag forcing 1.1 nodes to be compatible with DistSQL
flows produced by 1.0 gateways. This is motivated by some clients not
finding it easy to upgrade from 1.0 to 1.1 because of DistSQL on 1.1
refusing flows produced by 1.0, and so a mixed-version cluster running
for an extended period has a problem.

--extra-1.0-compatibility does the following:
- makes consumers not send handshake messages to producers upon stream
connection. These messages were new in 1.1 and would confuse 1.0 code.
These handshakes are not actually used at the moment, albeit they may be
in a future version.
- an old proto format for AggregationSpec's is supported.
- the DistSQL MinAcceptedVersion is reset to 3, which is 1.0.x's
version.

These changes are done only on the 1.1 branch. 1.2 will not support this
flag.
knz
Merge pull request #20286 from knz/20171127-cherrypick-20267
cherry-pick 1.1: sql: ensure that AS OF SYSTEM TIME is handled consistently
Merge pull request #20285 from andreimatei/extra-10-compat
1.1 only: distsqlrun: introduce --extra-1.0-compatibility
Commits on Nov 28, 2017
Merge pull request #20283 from mberhault/marc/cherrypick_20255
cherrypick-1.1: cli: dump: fix infinite loop in when resolving depend…
knz
sql: name the transaction settings like PG does
In PostgreSQL, the transaction isolation level is reported by the
session variable `transaction_isolation` (two words separated by an
underscore), not `transaction isolation level` (three words separated
by spaces). Also the transaction level is reported in lowercase.

Clients actually care about this stuff.

This patch makes CockroachDB behave more like PG.

This also aligns `transaction_priority` and `transaction_status`
(CockroachDB extension) with `transaction_isolation` (PG standard
setting), for more consistency in UX.

Patch series: ["Just because you are unique does not mean you are
useful."](https://duckduckgo.com/?q=just+because+you+are+unique+does+not+mean+you+are+useful&iar=images)

----

Release note (sql): the session settings `transaction isolation
level`, `transaction priority` and `transaction status` are now called
`transaction_isolation`, `transaction_priority` and
`transaction_status` for better compatibility with PostgreSQL.
knz
Merge pull request #20301 from knz/20171128-cherrypick-20264
cherry-pick 1.1: sql: name the transaction settings like PG does
Commits on Nov 30, 2017
Merge pull request #20284 from mberhault/marc/update_rocksdb
cherrypick-1.1: skip generating empty sst
Commits on Dec 04, 2017
storage: move txn intent cleanup out of GC critical path, & GC keys e…
…arly

This change first moves the cleanup of potentially "fat" transactions out
of the GC critical path by sending them to the intent resolver's asynchronous
cleanup mechanism. This prevents a txn laden with significant numbers of
unresolved intents from gumming up the GC queue and experiencing context
timeouts.

Moved GC of keys scanned during the GC process to execute before intents
encountered during the scan are processed. This makes progress in the GC
queue more likely and greatly lessens the chance of a stubborn range
entering into an infinite GC loop.

Removed unused code for the `ResolveOptions.Wait` boolean and changed
`intentResolver.resolveIntents` to send batches of 100 serially with a new
timeout for each batch. In concert with this, removed the timeout previously
set in `intentResolver.processIntents`.

Release note: improve garbage collection of very large transactions and
large volumes of abandoned writes (intents).
Commits on Dec 05, 2017
libroach: avoid significant temp disk space usage in DBCompact
Change DBCompact to individually compact the ranges delineated by the
sstables in the bottom-most level. This reduces temporary disk space
usage for compact range significantly. Before this change, running
`debug compact` on a 2.5GB storage directory temporarily grew the size
of the storage directory to 3.5GB. After this change, the storage
directory remains at 2.5GB (or below).

Release note (cli): reduce temporary disk space usage for the `debug
compact` command.

Fixes #20338
distsql: fix crash in addSorters
Fixing a crash triggered by a case where we don't actually need a sortNode
column.

Fixes #20481.

Release note (bug fix): fixed a crash triggered by some corner-case queries
containing ORDER BY.
Commits on Dec 06, 2017
Merge pull request #20502 from petermattis/pmattis/cherrypick-debug-c…
…ompact

cherry-pick-1.1: libroach: avoid significant temp disk space usage in DBCompact
Merge pull request #20512 from RaduBerinde/crash-plan-sort-1.1
cherrypick-1.1: distsql: fix crash in addSorters
distsqlrun: close orderedSynchronizer sources that were neglected
These sources were neglected because they were not added to the heap
when a heap initialization error happened. The ordered synchronizer
would then go into a draining phase, but it would only close sources
that were added to the heap.

Fixes #19951

This commit also improves the testing of distsql query cancellation. The
issue that this commit fixes is caught by stress testing the new test.

Release note (bug fix): Fixes a race condition that would result in some
queries hanging after cancellation.
cherrypick-1.1: fix panic when indexing selecting with additional schema
change columns.

Release note (bug fix): fixed a panic for an edge-case when queries are
run against a table undergoing a schema change.
dt and benesch
build: separate out race logic tests
the testrace build is going over 20min lately, so pulling logic tests (~15min) into their own build should help a bit.
a follow up step might be to split default from the rest of the logic tests if testlogicrace is still the slowest build.
Merge pull request #20446 from cockroachdb/cp11-gcqueue
cherrypick-1.1: storage: move txn intent cleanup out of GC critical path, & GC keys early
Merge pull request #20542 from benesch/1.1-logicrace-split
cherrypick-1.1: build: separate out race logic tests
Commits on Dec 07, 2017
Merge pull request #20534 from asubiotto/asubiotto/cherrypick_20088
cherrypick-1.1: distsqlrun: close orderedSynchronizer sources that were neglected
Commits on Dec 08, 2017
sql: trace schema changes
Motivated by the fact that previously, `DROP TABLE` did not allow any
kind of introspection. Now, at least there's something you can look
at in `/debug/requests` to see activity.

Touches #19004

Release note: None
cli: check existing instance in RocksDB debug
Running, for example, `./cockroach debug keys no-such-dir` would result
in a new RocksDB instance to be initialized at `no-such-dir`. This is
confusing and annyoing; instead, return an error.

Release note: None
distsql: fix panic during join planning
This change fixes a panic that was based on a faulty assumption: that we either
have merged columns (NATURAL JOIN, USING) or we have no merged columns and an ON
condition. This seems true based on the syntax, but for inner joins filters
(WHERE) get pushed down and become ON conditions.

In 1.2 this was fixed as part of a some bigger changes that removed merged
columns altogether (#19640). This fix is specific to 1.1 (this is not a
cherry-pick).

Release note: fixed a crash caused by NATURAL JOINS and USING in conjunction
with a filter.

Fixes #20569.
Merge pull request #20541 from richardwu/cherry-pick-index
cherrypick-1.1: fix panic when indexing selecting with additional schema change columns.
Commits on Dec 09, 2017
build: specify snappy target.
Release Note: fix build when new versions of libgtest are installed.

Fixes #20596

When `libgtest` is installed, the snappy unittests fail to build.
Since we never run the snappy tests, let's stop building them entirely.
Commits on Dec 10, 2017
Merge pull request #20582 from RaduBerinde/join-plan-panic-1.1
1.1 fix: distsql: fix panic during join planning
Commits on Dec 11, 2017
Merge pull request #20576 from tschottdorf/cp11-trace-schema-changer
cherrypick-1.1: sql: trace schema changes
Merge pull request #20577 from tschottdorf/cp11-rocksdb-emptydir
cherrypick-1.1: cli: check existing instance in RocksDB debug
Commits on Dec 12, 2017
storageccl: support 201 and 204 return codes
Also correctly close response bodies when unused.

Fixes #20017
Merge pull request #20516 from mjibson/cherrypick-20027
cherrypick-1.1: storageccl: support 201 and 204 return codes
sql: bugfix to evalling null as collated string
Previously, evaluating a prepared statement where a placeholder was
directly collated with the `COLLATE` operator could panic if the input
was null.

Release note (bug fix): Fix a panic with null collated strings.
Merge pull request #20648 from jordanlewis/cp-col-null
cherrypick-1.1: sql: bugfix to evalling null as collated string
lgo
sql: Fix return type signature of array_positions built-in
This error was fixed in #20524, and just the fix is
being backported to 1.1.

The following type signature was changed:

    array_positions(<TYPE>[], <TYPE>) now returns a INT[] (previously
        <TYPE>[])

Due to the incorrect type signature returned, a query that contains this
function would cause a panic in distSQL, as follows:

    panic: invalid datum type given: int[], expected string[]

    goroutine 336 [running]:
    github.com/cockroachdb/cockroach/pkg/sql/sqlbase.DatumToEncDatum(0xf, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc4202ff0e8, 0x6673720, 0xc420428450, ...)
      /Users/joey/.go/src/github.com/cockroachdb/cockroach/pkg/sql/sqlbase/encoded_datum.go:122 +0x292
    github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*ProcOutputHelper).EmitRow(0xc420c04000, 0x6665820, 0xc42012e120, 0xc42106a9a0, 0x3, 0x3, 0x3, 0x0, 0x0)
      /Users/joey/.go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/processors.go:286 +0xa79
    github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*tableReader).Run(0xc420c04000, 0x6665820, 0xc42012e120, 0xc420669398)
      /Users/joey/.go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/tablereader.go:214 +0x412
    created by github.com/cockroachdb/cockroach/pkg/sql/distsqlrun.(*Flow).Start
      /Users/joey/.go/src/github.com/cockroachdb/cockroach/pkg/sql/distsqlrun/flow.go:394 +0x3d7

Release note (bug fix): Fixed the return type signature of the
array_positions built-in funtion. This caused a panic if the
function was used in query executing in the distributed execution
engine.
1.1 fix: sql: fix LIKE with '_...%', '%..._' and '\' escaped characte…
…r patterns

This cherry-pick (with modifications) fixes LIKE comparison operators
where we did not handle single wildcards ('_') properly in the optimized
like pathway whenever we had a wildcard ('%') at the beginning or end.

It also fixes how we handle '\' escaped characters with LIKE patterns.
This fix should align Cockroach's LIKE pattern matching with Postgres.

The original PR that fixed this in 2.0 is #20600.

Release note: fixed an issue where wildcards ('_', '%') and '\' escaped
characters in LIKE patterns were not handled properly.
lgo
Merge pull request #20652 from lego/1_1-20171212-fix-array-positions-…
…returntype

1.1: sql: Fix return type signature of array_positions built-in
Merge pull request #20654 from richardwu/fix-like-match-v1.1
1.1 fix: sql: fix LIKE with '_...%', '%..._' and '\' escaped character patterns
Commits on Dec 18, 2017
knz
log: ensure panic details are logged to file even when stderr is not …
…redirected

Prior to this patch, panic details would not be captured to logs
unless the log file was forcing redirection of stderr. This was
notoriously not the case when passing `--logtostderr` or
`--logtostderr=INFO`.  (In contrast, `--logtostderr=WARNING` would
redirect stderr and cause the panic details to be properly captured.)

This patch fixes the issue by capturing the panic details every time
stderr is not redirected.

Release note (bug fix): the crash details are now properly copied to
the log file when starting a node with `--logtostderr` (and in some
other circumstances where they could be lost previously).
Commits on Dec 19, 2017
knz
Merge pull request #20842 from knz/20171218-cherrypick-20839
cherry-pick 1.1: log: ensure panic details are logged to file even when stderr is not redirected
1.1 fix/cherry-pick: fix error index alignment in multiple partial
distSender batches

Release notes: (bug fix): fixed an issue where seemingly irrelevant
error messages were being returned for certain insert statements.
Merge pull request #20855 from richardwu/fix-err-indexes-1.1
1.1 fix/cherry-pick: fix error index alignment in multiple partial distSender batches
Commits on Dec 20, 2017
storage: Write HardState atomically with committing splits
Prior to this change, an ill-timed crash (between applying the raft
command and calling splitPostApply) would leave the replica in a
persistently broken state (no HardState).

Found via jepsen.

Fixes #20629
Fixes #20494

Release note (bugfix): Fixed a replica corruption that could occur if
a process crashed in the middle of a range split.
storage: add permitLargeSnapshots flag to replica
In a privately reported user issue, we've seen that [our attempts](#7788)
at [preventing large snapshots](#7581)
can result in replica unavailability. Our current approach to limiting
large snapshots assumes is that its ok to block snapshots indefinitely
while waiting for a range to first split. Unfortunately, this can create
a dependency cycle where a range requires a snapshot to split (because it
can't achieve an up-to-date quorum without it) but isn't allowed to perform
a snapshot until its size is reduced below the threshold. This can result
in unavailability even when a majority of replicas remain live.

Currently, we still need this snapshot size limit because unbounded snapshots
can result in OOM errors that crash entire nodes. However, once snapshots
are streamed from disk to disk, never needing to buffer in-memory on the
sending or receiving side, we should be able to remove any snapshot size
limit (see #16954).

As a holdover, this change introduces a `permitLargeSnapshots` flag on a
replica which is set when the replica is too large to snapshot but observes
splits failing. When set, the flag allows snapshots to ignore the size
limit until the snapshot goes through and splits are able to succeed
again.

Release note (bug fix): Fixed a scenario where a range that is too big
to snapshot can lose availability even with a majority of nodes alive.
Merge pull request #20939 from bdarnell/cherrypick-split-atomic
cherrypick-1.1: storage: Write HardState atomically with committing splits
storage: updating local Store before gossip should not crash
Updating a target store write stats immediately after rebalancing was
recently addressed in #18425. With that change, if `updateLocalStoreAfterRebalance`
is called before the `StorePool` had seen the `StoreDescriptor` in gossip,
it will trigger a NPE. This change fixes this by making the update a
no-op if the descriptor has yet to be seen in gossip.
storage: Fix simulation of rebalance removals to actually remove targets
If the first target attempted was rejected due to the simulation
claiming that it would be immediately removed, we would reuse the
modified `rangeInfo.Desc.Replicas` that had the target added to it,
messing with future iterations of the loop.

Also, we weren't properly modifying the `candidates` slice, meaning that
we could end up trying the same replica multiple times.

Release note (bug fix): Improve data rebalancing to make thrashing
back and forth between nodes much less likely.
storage: Always simulate RemoveTarget when rebalancing
Skipping the simulation when raftStatus.Progress is nil can make
for undesirable thrashing of replicas, as seen when testing #20241.
It's better to run the simulation without properly filtering replicas
than to not run it at all.

Release note: None
Merge pull request #20906 from nvanbenschoten/nvanbenschoten/cherrypi…
…ck-20589

cherry-pick-1.1: storage: add permitLargeSnapshots flag to replica
storage: Avoid replica thrashing when localities are different sizes
Fixes #20241

Release note (bug fix): avoid rebalance thrashing when localities have
very different numbers of nodes
Commits on Dec 21, 2017
Merge pull request #20934 from a-robinson/cherrypick_20241
cherrypick-1.1: storage: avoid replica thrashing when localities are different sizes
knz
cli/sql: do not fail if sql_safe_updates cannot be set
Prior to this patch, an interactive SQL shell (`cockroach sql`) would
error out if connecting to a server which doesn't support the session
variable `sql_safe_updates` -- e.g. CockroachDB 1.0 or PostgreSQL
itself.

This patch ensures that a failure is only reported as a warning.

Release note (cli change): `cockroach sql` does not fail any more when
the server does not support the `sql_safe_updates` session variable.
knz
cli: de-emphasize the seriousness of errors in ancillary SQL client s…
…ervices

The code that implements the SQL connection object utilized by various
CLI utilities (including, but not limited to, `cockroach sql`) provide
ancillary services for the user's convenience, like checking whether
the client and the server version are the same, etc.

When these checks fail, the failure should not indicate malfunction --
they can simply indicate that the utility was run against a previous
CockroachDB version that does not yet support some service, or that
`cockroach sql` is being run against a PostgreSQL server.

Prior to this patch, the errors due to ancillary failures were
reported with serious-sounding messages, often starting with the word
"unable" or "error".

This patch de-emphasizes them by prefixing them with the string
"warning:".

Release note (cli change): some warning messages are now more clearly
indicated with the "warning:" prefix.
knz
cli: add a check that client version <= server version
This patch introduces two features:

- it ensures that a v1.1+ `cockroach` client connecting to a v1.0
  server properly recognizes that the server is running v1.0 (prior to
  this patch, the version strings from a v1.0 server were not
  recognized).

- it causes a warning message to be printed if the client is newer
  than the server.

For example:

```
kena@kenax ~/cockroach % ./cockroach sql --insecure

warning: server version older than client! proceed with caution; some features may not be available.

root@:26257/>
```

Release note (cli change): client commands that use SQL (including
`cockroach sql`, `cockroach node ls`, etc) now print a warning if the
server version is older than the client.
knz
knz
Merge pull request #20945 from knz/20171220-cherrypick-20935
cherry-pick 1.1: cli, cli/sql: version checks
ccl/storageccl: retry s3 region fetch
Retry an S3 operation that has been causing transient errors. If a
503 is returned, we may need to slow down our request rate. Although
the retry package does wait a bit, it is only some 10s of ms. We
instead wait 5s on these errors to attempt to slightly decrease the
request rate. This 5s is purely a guess, but it is also a low enough
time that it won't matter too much if we are waiting longer than
minimally needed.

For now put this only in the place we have seen the failure. We can
optionally retry the other locations if we see problems there. This
is the same approach we used for Azure and GCE with good results.

Fixes #19502
Merge pull request #21003 from mjibson/cherrypick-19556
cherrypick-1.1: ccl/storageccl: retry s3 region fetch
Commits on Dec 24, 2017
Merge pull request #21034 from tschottdorf/cp-20989
cherrypick-1.1: gossip: better reporting for a tripped assertion
Commits on Dec 30, 2017
knz
cli/sql: avoid check_syntax on client/server version mismatch
This is a toned down cherry-pick of #21119, as advised by #19445.

Release note (cli change): client-side syntax checking is now
only automatically enabled if the client and server are running
the same version of CockroachDB. This restriction will be
lifted in CockroachDB 2.0.
Commits on Jan 01, 2018
knz
Merge pull request #21135 from knz/20171230-check-syntax
cherry-pick 1.1: cli/sql: avoid check_syntax on client/server version mismatch
Commits on Jan 04, 2018
sql: fix tuple equality comparisons for tuples with NULL values
Fixes #21113.

Previously, tuple equality evaluation would short-circuit on NULL elements
just like tuple inequality evaluation. This behavior was desired for tuple
comparisons using inequality operators but was a deviation from the SQL
standard for tuple comparisons using the equality operator.

In other words, `(1, 2, 4) > (1, NULL, 5)` evaluated to `NULL`, correctly,
but `(1, 2, 4) = (1, NULL, 5)` also evaluated to `NULL`, incorrectly. Instead,
the latter expression should have evaluated to `false`. This is because tuple
comparison should only return NULL when the non-NULL elements are not sufficient
to determine the result. Since tuple inequality is defined lexicographically,
the first NULL element encountered causes ambiguity. For tuple equality, it
may still be possible to evaluate the expression even after NULL elements are
seen, so the evaluation cannot short circuit.

This change fixes the behavior for tuple equality, bringing it inline with
PostgreSQL and MySQL.

Note that we already had very similar logic for the `IN`, `ANY`, `SOME`, and
`ALL` comparison operators, so the behavior replaced here must have simply
been an oversight when tuple comparison was introduced.

Release note (sql change/bug fix): Fix tuple equality to evaluate correctly
in the presence of NULL elements.
sql: fix not-NULL spans for tuple !=
PR #21115 fixed the logic for `tuple != tuple` which was incorrectly handling
NULLs. This change fixes the corresponding logic in the index selection code
(which is incorrectly generating `/!NULL` spans).

Release note (sql change/bug fix): Fix tuple equality to evaluate correctly
in the presence of NULL elements.
Merge pull request #21237 from RaduBerinde/fix-null-ineq-1.1
cherrypick-1.1: sql: fix tuple equality comparisons for tuples with NULL values
Commits on Jan 13, 2018
storage: fix StoreList.filter to validate constraints properly
It was preposterously broken. It was only validating values, not
keys (when present); was requiring positive constraints; and, worst of
all, was requiring prohibited constraints instead of prohibiting them.

Luckily it was only used by TransferLeaseTarget and ShouldTransferLease,
so its damage was contained -- we wouldn't ever put a replica on a node
that we shouldn't, we'd just never choose to transfer the lease for an
affected range. The relative lack of use of positive and prohibited
constraints also helped us from ever seeing this in the wild.

Release note (bug fix): Fix incorrect logic in lease rebalancing that
prevented leases from being transferred
Commits on Jan 16, 2018
Merge pull request #21442 from a-robinson/cherrypick_21430
cherrypick-1.1: storage: fix StoreList.filter to validate constraints properly
Commits on Jan 19, 2018
sqlccl: don't fail RESTORE cleanup if external storage is gone
We aren't doing the full fix in #20185 because it's a protobuf change
and the code has diverged quite a bit. This change will allow the
jobs to terminate, even if it can't clean up the data in all cases.

Release note (enterprise change): prevent RESTORE jobs from looping
indefinitely during failure or cancelation if the external data is
not accessible.

See #20261
Merge pull request #21573 from mjibson/issue-20261
cherrypick-1.1: sqlccl: don't fail RESTORE cleanup if external storage is gone
Commits on Jan 25, 2018
util/log: Avoid infinite recursion out of disk errors cause an exit
Trying to write to a file when we're out of disk will trigger
exitLocked, but exitLocked tries to write to its file one last time in
order to help users understand why the process is exiting. This is very
valuable most of the time, when the problem isn't that the machine is
out of disk, but shouldn't cause a stack overflow when the machine is
out of space.

Fixes #21756

Release note (bug fix): fix a stack overflow in the code for shutting
down a server when out of disk space
Commits on Jan 26, 2018
Merge pull request #21804 from a-robinson/cherrypick_21768
cherrypick-1.1: util/log: Avoid infinite recursion out of disk errors cause an exit
dt
sql: Scrub error messages from reported SQL stats
Error messages often include offending parameters or other data that could be sensitive.
Our crash reports go to great lengths to ensure that only error messages explicitly marked as non-sensitive can be reported -- until we add that same handling to how the SQL execution stats track errors, we need to simply scrub the errors from SQL stats collected for diagnostic reporting.

Release note (general change): remove error messages from SQL execution statistics included in diagnostic reporting.
dt
Merge pull request #21827 from dt/scrub-1.1
cherrypick-1.1: sql: Scrub error messages from reported SQL stats
Commits on Jan 29, 2018
sql: only check tables for indexes during DROP INDEX
Previously, DROP INDEX was returning a nonsensical error message when a
view or sequence was present in the database, because it was
iterating over all TableDescriptors in the database, and asserting that
each one was a table before moving on to search it for indexes.

Instead, skip TableDescriptors that are not tables (views or sequences).

(This cherry pick didn't apply too cleanly; made some edits)

Release note: None
cherry-pick 1.1: sql: report schema change error with txn commit
Before this change, an error encountered by a schema change queued up by
a statement in an explicit transaction would not be reported to the
client unless the statement was part of the same query string as the
COMMIT statement. So, generally speaking, schema change errors queued up
in transactions would not be reported :).
This occurs because of remnants of code of code that tried to associate
errors with statements.
At some point I think we used to have code that either associated the error
with the statement that queued the change (if it still had access to its
result) or associated it with the commit otherwise. That broke when we
introduced results streaming. Or maybe it was always broken.

Besides this bug, there's an outstanding issue that the way in which
these errors are reported is not conformant to the pgwire protocol, and
also that these errors confusingly suggest to someone that the
transaction has not been committed. This commit does nothing to address
either of these. These issues are discussed in this Cockroach Labs
internal thread: https://groups.google.com/a/cockroachlabs.com/forum/#!topic/eng/Jc61hd6Pv2US

Fixes #21822

Release note(sql): Fix reporting of errors from transactional schema
changes: errors from DDL statements sent by a client as part of a
transaction, but in a different query string than the final commit used
to be silently swallowed.
cherry-pick 1.1: fix discrepancy with Postgres BYTES arrays
Fixes #21697.

This is a bit of an obnoxious fix to be making - we change the way we
format BYTES within arrays here without changing the way we format them
outside of arrays (which is different than master).

This should be a strict compatibility improvement, and it strikes me as
the smallest possible change to be making to fix the issue, but open to
suggestions if people disagree.

Release note (bug fix): fixed an issue with the wire-formatting of BYTES
arrays.
Merge pull request #21796 from justinj/cp-pg-bytes
cherry-pick 1.1: fix discrepancy with Postgres BYTES arrays
Merge pull request #21857 from vilterp/1.1-drop-index-fix
cherry-pick-1.1: sql: only check tables for indexes during DROP INDEX
Merge pull request #21865 from andreimatei/cherrypick-schema-change-err
cherry-pick 1.1: sql: report schema change error with txn commit
Commits on Jan 31, 2018
cherry-pick 1.1: pgwire: fix issue with re-using old buffer data
Release notes: fix issue with stale buffer data when using the binary
format for arrays.

Fixes #20372.
Fixes #19669.

This commit fixes an issue involving passing a bytes.Buffer by value
which would cause old buffered data for arrays to be re-used.

The bug here was somewhat subtle and had to do with copying a
bytes.Buffer by value whose slice header pointed to its fixed-size array
used for small allocations, and then *re-assigning* the original buffer,
causing the fixed-size array to be overwritten and the buffered value
changed. A reduced version of the issue can be seen here:

https://play.golang.org/p/4-v_AeqYtR
Merge pull request #22262 from justinj/cp-array-pgwire
cherry-pick 1.1: pgwire: fix issue with re-using old buffer data
Commits on Feb 12, 2018
allocator: Include alive store count in error messages
allocatorError messages have been misleading for a very long time --
they always say that 0 out of 0 stores match the constraints, even if
there are more than 0 live stores in the cluster. The last time a
correct count of the live stores was used was January 2017, before
f4f3ab6, and even then a count was only
used on one of two code paths.

Release note: None
Merge pull request #22595 from a-robinson/cherrypick_22565
cherrypick-1.1: allocator: Include alive store count in error messages
Commits on Feb 15, 2018
Commits on Feb 19, 2018
sql: fix COMMIT transition
Move to NoTxn state when a COMMIT encounters an error from the parallel
queue. This mirrors what we do on any other error on COMMIT.

Release note: sql bug fix - a COMMIT reporting an error generated by a
previous parallel (i.e. RETURNING NOTHING) statement no longer leaves
the connection in an aborted transaction state. Instead the transaction
is considered completed and a ROLLBACK is not necessary.
Commits on Feb 20, 2018
knz
sql: avoid leaking database names when scrubbing virtual table names
Release note (bug fix): Queries over virtual tables with an explicit
database name prefix (e.g. `select * from mydb.crdb_internal.tables`)
would not be scrubbed properly from reported statistics. This is now
fixed.
knz
Merge pull request #22815 from knz/20180219-cherrypick-22753
cherrypick-1.1: sql: avoid leaking database names when scrubbing virtual table names
Merge pull request #22826 from xudongzheng/release-1.1
build: allow building with go1.10
Commits on Feb 21, 2018
ui: exclude decommissioned nodes from cluster stats
Fixes #22710, #19782
Release note (admin ui change): Fixes an issue where decommissioned nodes are
included in cluster stats aggregates.
knz
sql: fix conversion of interval to float
Converting an interval to float must give a number of seconds. The
previous code would give a number of picoseconds (!).

Release note (bug fix): the conversion from `INTERVAL` to `FLOAT` now
properly returns the number of seconds in the interval.
knz
Merge pull request #22893 from knz/20180221-cherrypick-22892
cherrypick-1.1: sql: fix conversion of interval to float
Merge pull request #22864 from couchand/cherrypick/22711
cherrypick-1.1: ui: exclude decommissioned nodes from cluster stats
Commits on Feb 22, 2018
Merge pull request #22740 from nvanbenschoten/nvanbenschoten/22721
cherrypick-1.1: sql: attempt txn auto-commit before flushing txnResults
Merge pull request #22814 from andreimatei/cherry-pick-commit-transition
cherry-pick 1.1: sql: fix COMMIT transition
Commits on Feb 23, 2018
dt
server: don't crash on empty update check or diagnostic report overrides
You can override the updates check or diagnoatics report endpoints via env vars, but overriding them to empty would result in a nil pointer deference crash when the server attempted to check for new versions or send a diagnostics report.
Fixes #22967.

Release note (bug fix): do not crash if COCKROACH_UPDATE_CHECK_URL or COCKROACH_USAGE_REPORT_URL overrides are empty.
dt
server: skip check for new versions if diagnostics reports are disabled
If a user has disabled diagnostics reporting, we should assume that that expressed a preference that we not “phone-home” at all, including for the new-version checks.

Release note (general change): disabling diagnostics reporting also disables new version notification checks.
dt
Merge pull request #23008 from dt/nophonehome-1.1
cherrypick-1.1: fixes for disabling new-version pings
Commits on Feb 26, 2018
server: Don't send pgwire connections to the init GRPC server
PGWire messages do not reliably trigger errors in the HTTP server,
which leads to these connections timing out (which has been observed
to take minutes, depending on the client). Instead, we must always
send pgwire connections to the pgwire server (where they will still
block while the server is uninitialized, but they will be unblocked
as soon as it initializes).

This is a subset of #21682 for release-1.1.
Merge pull request #22663 from bdarnell/init-listener
release-1.1: server: Don't send pgwire connections to the init GRPC server
Commits on Mar 02, 2018
storage: fix getRaftLeader in multiTestContext
The previous version of getRaftLeader would not return the leader if all
replicas had the same term and the leader wasn't the first to be
iterated over.

Release note: None
storage: transfer raft leadership and wait grace period when draining
Previously, even though a draining replica would transfer its lease,
when it came to `leasePostApply`, the raft leadership would not be
transferred because the target was not up-to-date enough. This would
result in the draining replica taking the leadership with it and
producing a zero qps scenario until a new election was held after
an election timeout.

This change adds a draining flag to a replica so that it may skip this
check. A draining store will also wait for a minimum of 5s after
transferring away all of its replica's leases to allow for the raft
leadership change to take place.

Addresses #22573.

Release note (bug fix): Fix a zero qps scenario when a node would be
gracefully drained.
Merge pull request #23306 from asubiotto/asubiotto/1.1cp22767
cherrypick-1.1: storage: transfer raft leadership and wait grace period when draining
storage: Avoid transferring leases to draining stores
This was apparently never fully implemented/tested when draining was
first added.

In local testing with verbose logging enabled, I'm seeing leases
transfer back (or at least say that they were being transferred back)
to a draining store before it shut down, which was sometimes causing
brief (e.g. 3s) QPS outages.

Fixes #22573

Release note (bug fix): Avoid disruptions in performance when gracefully
shutting a node down.
Merge pull request #23302 from a-robinson/cherrypick_23265_1.1
cherrypick-1.1: storage: Avoid transferring leases to draining stores
Commits on Mar 09, 2018
status: force linkage via call to je_zone_register
go 1.9.4 and up require the env variable CGO_LDFLAGS_ALLOW to be set to
allow passing most linker flags. Instead of putting up with this, use
the alternative method of calling the hook directly to force inclusion.

Release note (build change): CockroachDB now builds with go 1.9.4 and
higher.
Merge pull request #23639 from bdarnell/cherrypick-go-1-10
cherrypick-1.1: status: force linkage via call to je_zone_register
Commits on Mar 16, 2018
storage: Disallow sync intent resolution in loadSystemConfig
This holds raftMu on the system config range for too long,
and in extreme cases can lead to deadlock.

Fixes #23254

Release note (bug fix): Fixed a deadlock when tables are rapidly
created or dropped.
Merge pull request #23957 from bdarnell/cherrypick11-sysconfig-intent
cherrypick-1.1: storage: Disallow sync intent resolution in loadSystemConfig
Commits on Mar 17, 2018
sql: Fix deadlock in schema changes with retriable errors
Previously a transaction heartbeat loop would be kept alive while
running schema change operations on the same goroutine, leading to
deadlock.

This is original work on the release-1.1 branch, not a cherry-pick
from master.

Fixes #23979
Commits on Mar 19, 2018
Merge pull request #23981 from bdarnell/schema-change-retry
release-1.1: sql: Fix deadlock in schema changes with retriable errors
Commits on Mar 20, 2018
server: Use remote_debugging setting to restrict all access to keys
Keys can potentially contain sensitive information, so lock down or
strip any debug endpoints that contain range start/end keys in
accordance with the server.remote_debugging.mode setting and where the
request originated.

This intentionally only applies the filtering for HTTP requests: gRPC
requests should be allowed through since they're already properly
authenticated by certificates in secure clusters.

Fixes #23555

Release note (admin ui change): More debug pages are now locked down by
the server.remote_debugging.mode cluster setting.
Merge pull request #24073 from a-robinson/backport1.1-23851
cherrypick-1.1: server: Use remote_debugging setting to restrict all access to keys
Commits on Mar 29, 2018
build: add bors.toml
Teaches Bors to wait for a successful build on TeamCity before merging
approved PRs to master, and not to merge PRs labeled do-not-merge.

Contributes to #22499
Release note (build change): Begin using Bors to automate the process of
merging PRs to master.
Merge #24324
24324: cherrypick-1.1: build: add bors.toml r=bdarnell a=couchand

A cherry-pick of #24100.

Teaches Bors to wait for a successful build on TeamCity before merging
approved PRs to master, and not to merge PRs labeled do-not-merge.

Contributes to #22499
Release note (build change): Begin using Bors to automate the process of
merging PRs to master.

cc @cockroachdb/release @bdarnell @benesch
Commits on Apr 03, 2018
build: teach Bors about the CLA check
When introducing Bors in #24100, I only thought to include the TeamCity build
status, but since we require the license check for regular, GitHub-initiated
merges, this was a regression.  This change corrects that, adding the CLA to
the list of PR statuses to check before allowing `r+`.

Release note: None
Commits on Apr 06, 2018
Merge #24447
24447: cherrypick-1.1: build: teach Bors about the CLA check r=couchand a=couchand

A cherry-pick of #24420.

cc @bdarnell @benesch 

When introducing Bors in #24100, I only thought to include the TeamCity build
status, but since we require the license check for regular, GitHub-initiated
merges, this was a regression.  This change corrects that, adding the CLA to
the list of PR statuses to check before allowing `r+`.

Release note: None
Commits on Apr 11, 2018
build: give craig an avatar
At present, when GitHub apps create commits, they don't show up
with the app's avatar.  A workaround is to commit with an e-mail
address that has an associated gravatar.

Release note: None
build: move bors.toml into .github folder
Just to keep the root of the project a little cleaner.

Release note: None
craig[bot] and couchand
Merge #24672
24672: cherrypick-1.1: give craig an avatar, move bors.toml into .github r=couchand a=couchand

build: give craig an avatar

At present, when GitHub apps create commits, they don't show up
with the app's avatar.  A workaround is to commit with an e-mail
address that has an associated gravatar.

build: move bors.toml into .github folder

Just to keep the root of the project a little cleaner.

---

I hadn't planned on cherry-picking this, but figured it's a reasonable test that the branch protection is set up correctly.

cc: @cockroachdb/release @jordanlewis 

Co-authored-by: Andrew Couch <andrew@cockroachlabs.com>
Commits on Apr 17, 2018
sql: Fix "dangling rows" left over by schema change + rollback to sav…
…epoint

Prior to this patch, the following sequence would write a row that's
then unaccessible:
begin; savepoint cockroach_restart; create table t(x int primary key); rollback to savepoint cockroach_restart;
insert into t(x) values(1); release savepoint cockroach_restart; commit;

The insert manages to resolve the table because we weren't clearing the
"uncommitted descriptors" set that the session maintains. Since the
table descriptor doesn't end up being committed, the row will be
essentially unreachable.

This patch is original work on the 1.1 branch. This bug is not present
in the 2.0 release, where we're better about reseting state on
transaction retries.
I believe on 1.1 there's still a similar issue present when doing
automatic retries (as opposed to user-directed ones through rollback to
savepoint). I think fixing those would be more involved because it's
less clear where to stick the cleanup done in this patch; I'd rather not
do anything this point.

Also note that, with the fix, the sequence above doesn't actually work.
Instead, it deadlocks, just like it does on 2.0, because of #24885.
However, the following works:
begin; savepoint cockroach_restart; create table t(x int primary key); rollback to savepoint cockroach_restart;
create table t(x int primary key); insert into t(x) values(1); release savepoint cockroach_restart; commit;

Fixes #24785
craig[bot] and andreimatei
Merge #24888
24888: sql: Fix "dangling rows" left over by schema change + rollback to savepoint r=andreimatei a=andreimatei

Prior to this patch, the following sequence would write a row that's
then unaccessible:
begin; savepoint cockroach_restart; create table t(x int primary key); rollback to savepoint cockroach_restart;
insert into t(x) values(1); release savepoint cockroach_restart; commit;

The insert manages to resolve the table because we weren't clearing the
"uncommitted descriptors" set that the session maintains. Since the
table descriptor doesn't end up being committed, the row will be
essentially unreachable.

This patch is original work on the 1.1 branch. This bug is not present
in the 2.0 release, where we're better about reseting state on
transaction retries.
I believe on 1.1 there's still a similar issue present when doing
automatic retries (as opposed to user-directed ones through rollback to
savepoint). I think fixing those would be more involved because it's
less clear where to stick the cleanup done in this patch; I'd rather not
do anything this point.

Also note that, with the fix, the sequence above doesn't actually work.
Instead, it deadlocks, just like it does on 2.0, because of #24885.
However, the following works:
begin; savepoint cockroach_restart; create table t(x int primary key); rollback to savepoint cockroach_restart;
create table t(x int primary key); insert into t(x) values(1); release savepoint cockroach_restart; commit;

Fixes #24785

cc @cockroachdb/release  @gpaul 

Co-authored-by: Andrei Matei <andrei@cockroachlabs.com>
Commits on Apr 18, 2018
knz
sql: fix ALTER INDEX RENAME on primary index
ALTER INDEX ... RENAME would crash if issued on the primary index.

Release note (bug fix): `ALTER INDEX ... RENAME` can now be used on
the primary index.
craig[bot] and knz
Merge #24779
24779: cherrypick-1.1: sql: fix ALTER INDEX RENAME on primary index r=knz a=knz

Picks  #24776.

cc @cockroachdb/release 

Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
build: Update etcd
Picks up a cherry-picked version of etcd-io/etcd#9073, to fix #18601

Release note (bug fix): Fixes potential cluster unavailability after
raft logs grow too large.
Commits on Apr 19, 2018
craig[bot] and bdarnell
Merge #24889
24889: cherrypick-1.1: build: Update etcd r=bdarnell a=bdarnell

Picks up a cherry-picked version of etcd-io/etcd#9073, to fix #18601

Release note (bug fix): Fixes potential cluster unavailability after
raft logs grow too large.

Co-authored-by: Ben Darnell <ben@cockroachlabs.com>
Commits on Apr 24, 2018
build: GH label changes
- Robot -> O-robot
- test-failure -> C-test-failure

Release note: None
Commits on Apr 25, 2018
craig[bot] and jordanlewis
Merge #25055
25055: backport-1.1: build: GH label changes r=jordanlewis a=jordanlewis

Backport 1/1 commits from #25050.

/cc @cockroachdb/release

---

- Robot -> O-robot
- test-failure -> C-test-failure

@benesch do we need to backport this for it to work on other branches?

Release note: None


Co-authored-by: Jordan Lewis <jordanthelewis@gmail.com>
Commits on Oct 01, 2018
rpc,server: authenticate all gRPC methods
Previously only the roachpb.Batch RPC was correctly checking for an
authenticated user. All other RPCs were open to the public, even when
the server was running in secure mode.

To prevent future accidents of this kind, hoist the authentication check
to a gRPC interceptor that is guaranteed to run before all RPCs.

Release note (bug fix): A security vulnerability in which data could be
leaked from or tampered with in a cluster in secure mode has been fixed.

Release note: None
Merge pull request #30824 from benesch/secure-grpc-1.1
release-1.1: rpc,server: authenticate all gRPC methods
Showing 376 changed files with 17,874 additions and 6,459 deletions.
View
@@ -0,0 +1,13 @@
status = [
"GitHub CI (Cockroach)"
]
pr_status = [
"license/cla"
]
block_labels = [
"do-not-merge"
]
[committer]
name = "craig[bot]"
email = "bors@cockroachlabs.com"
View
@@ -1 +1 @@
GOVERS = go1\.[89].*
GOVERS = go1\.([89]|10).*
View

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
View
@@ -51,4 +51,9 @@ ignored = [
[[constraint]]
name = "github.com/coreos/etcd"
branch = "master"
source = "https://github.com/cockroachdb/etcd"
branch = "crdb-release-1.1"
[[constraint]]
name = "github.com/rlmcpherson/s3gof3r"
revision = "864ae0bf7cf2e20c0002b7ea17f4d84fec1abc14"
View
14 LICENSE
@@ -16,10 +16,10 @@ CockroachDB Community License Agreement
Agreement, the terms below have the following meanings.
(a) "CockroachDB" shall mean the SQL database software provided by Cockroach
Labs, including both CockroachDB Community and CockroachDB Enterprise
Labs, including both CockroachDB Core and CockroachDB Enterprise
editions, as defined below.
(b) "CockroachDB Community Edition" shall mean the open source version of
(b) "CockroachDB Core" shall mean the open source version of
CockroachDB, available free of charge at
https://github.com/cockroachdb/cockroach
@@ -93,25 +93,25 @@ CockroachDB Community License Agreement
2. Licenses.
(a) License to CockroachDB Community Edition. The License for CockroachDB
Community Edition is the Apache License, Version 2.0 ("Apache License").
(a) License to CockroachDB Core. The License for CockroachDB
Core is the Apache License, Version 2.0 ("Apache License").
The Apache License includes a grant of patent license, as well as
redistribution rights that are contingent on several requirements.
Please see
http://www.apache.org/licenses/LICENSE-2.0
for full terms. CockroachDB Community Edition is a no-cost, entry-level
for full terms. CockroachDB Core is a no-cost, entry-level
license and as such, contains the following disclaimers: NOTWITHSTANDING
ANYTHING TO THE CONTRARY HEREIN, COCKROACHDB COMMUNITY EDITION IS
ANYTHING TO THE CONTRARY HEREIN, COCKROACHDB CORE IS
PROVIDED "AS IS" AND "AS AVAILABLE", AND ALL EXPRESS OR IMPLIED
WARRANTIES ARE EXCLUDED AND DISCLAIMED, INCLUDING WITHOUT LIMITATION THE
IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE,
NON-INFRINGEMENT, AND ANY WARRANTIES ARISING BY STATUTE OR OTHERWISE IN
LAW OR FROM COURSE OF DEALING, COURSE OF PERFORMANCE, OR USE IN TRADE.
For clarity, the terms of this Agreement, other than the relevant
definitions in Section 1 and this Section 2(a) do not apply to
CockroachDB Community Edition.
CockroachDB Core.
(b) License to CockroachDB Enterprise Edition.
View
@@ -28,7 +28,7 @@ TESTS :=.## Tests to run for use with `make test`.
BENCHES :=## Benchmarks to run for use with `make bench`.
FILES :=## Space delimited list of logic test files to run, for make testlogic.
TESTTIMEOUT := 4m## Test timeout to use for regular tests.
RACETIMEOUT := 15m## Test timeout to use for race tests.
RACETIMEOUT := 25m## Test timeout to use for race tests.
ACCEPTANCETIMEOUT := 30m## Test timeout to use for acceptance tests.
BENCHTIMEOUT := 5m## Test timeout to use for benchmarks.
TESTFLAGS :=## Extra flags to pass to the go test runner, e.g. "-v --vmodule=raft=1"
@@ -361,8 +361,7 @@ $(ARCHIVE).tmp: .buildinfo/tag .buildinfo/rev .buildinfo/basebranch
# For details, see the "Possible timestamp problems with diff-files?" thread on
# the Git mailing list (http://marc.info/?l=git&m=131687596307197).
.buildinfo/tag: | .buildinfo
@{ git describe --tags --exact-match 2> /dev/null || git rev-parse --short HEAD; } | tr -d \\n > $@
@git diff --quiet HEAD || echo -dirty >> $@
@{ git describe --tags --dirty 2> /dev/null || git rev-parse --short HEAD; } | tr -d \\n > $@
.buildinfo/basebranch: | .buildinfo
@git describe --tags --abbrev=0 | tr -d \\n > $@
View
@@ -430,7 +430,7 @@ libprotobuf: $(PROTOBUF_DIR)/Makefile
.PHONY: libsnappy
libsnappy: $(SNAPPY_DIR)/Makefile
@$(MAKE) --no-print-directory -C $(SNAPPY_DIR)
@$(MAKE) --no-print-directory -C $(SNAPPY_DIR) snappy
.PHONY: librocksdb
librocksdb: $(ROCKSDB_DIR)/Makefile
View
@@ -1,7 +1,14 @@
FROM debian:8.7
FROM debian:8.9
MAINTAINER Tobias Schottdorf <tobias.schottdorf@gmail.com>
# Install root CAs so we can make SSL connections to phone home and
# do backups to GCE/AWS/Azure.
RUN apt-get update && \
apt-get -y upgrade && \
apt-get install -y ca-certificates && \
rm -rf /var/lib/apt/lists/*
RUN mkdir -p /cockroach
COPY cockroach.sh cockroach /cockroach/
# Set working directory so that relative paths
View
@@ -23,6 +23,7 @@ import (
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"testing"
@@ -186,7 +187,7 @@ func TestStyle(t *testing.T) {
t.Run("TestTodoStyle", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(pkg.Dir, "git", "grep", "-nE", `\sTODO\([^)]*\)[^:]`, "--", "*.go")
cmd, stderr, filter, err := dirCmd(pkg.Dir, "git", "grep", "-nE", `\sTODO\([^)]+\)[^:]`, "--", "*.go")
if err != nil {
t.Fatal(err)
}
@@ -210,7 +211,7 @@ func TestStyle(t *testing.T) {
t.Run("TestTimeutil", func(t *testing.T) {
t.Parallel()
cmd, stderr, filter, err := dirCmd(pkg.Dir, "git", "grep", "-nE", `time\.(Now|Since)`, "--", "*.go")
cmd, stderr, filter, err := dirCmd(pkg.Dir, "git", "grep", "-nE", `time\.(Now|Since|Unix\())`, "--", "*.go", ":!pkg/security/securitytest/embedded.go")
if err != nil {
t.Fatal(err)
}
@@ -221,7 +222,7 @@ func TestStyle(t *testing.T) {
if err := stream.ForEach(stream.Sequence(
filter,
stream.GrepNot(`^util/(log|syncutil|timeutil|tracing)/\w+\.go\b`),
stream.GrepNot(`^util/(syncutil|timeutil|tracing)/\w+\.go\b`),
), func(s string) {
t.Errorf(`%s <- forbidden; use "timeutil" instead`, s)
}); err != nil {
@@ -523,6 +524,26 @@ func TestStyle(t *testing.T) {
t.Run("TestForbiddenImports", func(t *testing.T) {
t.Parallel()
// forbiddenImportPkg -> permittedReplacementPkg
forbiddenImports := map[string]string{
"context": "golang.org/x/net/context",
"log": "util/log",
"path": "path/filepath",
"github.com/golang/protobuf/proto": "github.com/gogo/protobuf/proto",
"github.com/satori/go.uuid": "util/uuid",
"golang.org/x/sync/singleflight": "github.com/cockroachdb/cockroach/pkg/util/syncutil/singleflight",
}
// grepBuf creates a grep string that matches any forbidden import pkgs.
var grepBuf bytes.Buffer
grepBuf.WriteByte('(')
for forbiddenPkg := range forbiddenImports {
grepBuf.WriteByte('|')
grepBuf.WriteString(regexp.QuoteMeta(forbiddenPkg))
}
grepBuf.WriteString(")$")
filter := stream.FilterFunc(func(arg stream.Arg) error {
for _, useAllFiles := range []bool{false, true} {
buildContext := build.Default
@@ -559,31 +580,30 @@ func TestStyle(t *testing.T) {
filter,
stream.Sort(),
stream.Uniq(),
stream.Grep(`^`+settingsPkgPrefix+`: | `+grepBuf.String()),
stream.GrepNot(`cockroach/pkg/cmd/`),
stream.Grep(`^`+settingsPkgPrefix+`: | (github\.com/golang/protobuf/proto|github\.com/satori/go\.uuid|log|path|context|syscall)$`),
stream.GrepNot(`cockroach/pkg/(cli|security): syscall$`),
stream.GrepNot(`cockroach/pkg/(base|security|util/(log|randutil|stop)): log$`),
stream.GrepNot(`cockroach/pkg/(server/serverpb|ts/tspb): github\.com/golang/protobuf/proto$`),
stream.GrepNot(`cockroach/pkg/util/caller: path$`),
stream.GrepNot(`cockroach/pkg/ccl/storageccl: path$`),
stream.GrepNot(`cockroach/pkg/util/uuid: github\.com/satori/go\.uuid$`),
), func(s string) {
switch {
case strings.HasSuffix(s, " path"):
t.Errorf(`%s <- please use "path/filepath" instead of "path"`, s)
case strings.HasSuffix(s, " log"):
t.Errorf(`%s <- please use "util/log" instead of "log"`, s)
case strings.HasSuffix(s, " github.com/golang/protobuf/proto"):
t.Errorf(`%s <- please use "github.com/gogo/protobuf/proto" instead of "github.com/golang/protobuf/proto"`, s)
case strings.HasSuffix(s, " github.com/satori/go.uuid"):
t.Errorf(`%s <- please use "util/uuid" instead of "github.com/satori/go.uuid"`, s)
case strings.HasSuffix(s, " context"):
t.Errorf(`%s <- please use "golang.org/x/net/context" instead of "context"`, s)
case strings.HasSuffix(s, " syscall"):
t.Errorf(`%s <- please use "golang.org/x/sys" instead of "syscall"`, s)
case strings.HasPrefix(s, settingsPkgPrefix+": github.com/cockroachdb/cockroach"):
if !strings.HasSuffix(s, "testutils") && !strings.HasSuffix(s, "humanizeutil") &&
!strings.HasSuffix(s, settingsPkgPrefix) {
pkgStr := strings.Split(s, ": ")
importingPkg, importedPkg := pkgStr[0], pkgStr[1]
// Test that a disallowed package is not imported.
if replPkg, ok := forbiddenImports[importedPkg]; ok {
t.Errorf(`%s <- please use %q instead of %q`, s, replPkg, importedPkg)
}
// Test that the settings package does not import CRDB dependencies.
if importingPkg == settingsPkgPrefix && strings.HasPrefix(importedPkg, cockroachDB) {
switch {
case strings.HasSuffix(s, "testutils"):
case strings.HasSuffix(s, "humanizeutil"):
case strings.HasSuffix(s, settingsPkgPrefix):
default:
t.Errorf("%s <- please don't add CRDB dependencies to settings pkg", s)
}
}
View
@@ -16,7 +16,14 @@ build/builder.sh env \
build/builder.sh make lint 2>&1 | tee artifacts/lint.log | go-test-teamcity
build/builder.sh make generate
build/builder.sh /bin/bash -c '! git status --porcelain | read || (git status; git diff -a 1>&2; exit 1)'
# The workspace is clean iff `git status --porcelain` produces no output. Any
# output is either an error message or a listing of an untracked/dirty file.
if [[ "$(git status --porcelain 2>&1)" != "" ]]; then
git status >&2 || true
git diff -a >&2 || true
exit 1
fi
# Run the UI tests. This logically belongs in teamcity-test.sh, but we do it
# here to minimize total build time since the rest of this script completes
@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""Post failures from the current teamcity job as github issues.
Requires the following environment variables:
- TC_API_PASSWORD
- TC_BUILD_BRANCH
- TC_BUILD_ID
- GITHUB_API_TOKEN
"""
import json
import os
import urllib.error
import urllib.request
import xml.etree.ElementTree as ET
from urllib.parse import urljoin, urlencode
BASEURL = "https://teamcity.cockroachdb.com/httpAuth/app/rest/"
auth_handler = urllib.request.HTTPBasicAuthHandler()
auth_handler.add_password(realm='TeamCity',
uri='https://teamcity.cockroachdb.com',
user='robot',
passwd=os.environ['TC_API_PASSWORD'])
opener = urllib.request.build_opener(auth_handler)
def tc_url(path, **params):
return urljoin(BASEURL, path) + '?' + urlencode(params)
def collect_build_results(build_id):
"""Yield a sequence of (name, log) pairs for all failed tests.
Looks at the given build ID and all its dependencies.
"""
dep_data = ET.parse(opener.open(tc_url('builds/{0}'.format(build_id),
fields='snapshot-dependencies(build(id,status))')))
for b in dep_data.findall("./snapshot-dependencies/build"):
if b.attrib['status'] != 'SUCCESS':
yield from collect_build_results(b.attrib['id'])
test_data = ET.parse(opener.open(tc_url('testOccurrences',
locator='count:100,status:FAILURE,build:(id:{0})'.format(build_id),
fields='testOccurrence(details,name,duration,build(buildType(name)))')))
for o in test_data.findall('./testOccurrence'):
test_name = '{0}/{1}'.format(o.find('build/buildType').attrib['name'], o.attrib['name'])
test_log = '--- FAIL: {0}/{1} ({2:.3f}s)\n{3}\n'.format(
o.find("build/buildType").attrib["name"],
o.attrib["name"],
int(o.attrib["duration"])/1000.,
o.find("details").text)
yield (test_name, test_log)
def create_issue(build_id, failed_tests):
"""Format a list of failed tests as an issue.
Returns a dict which should be encoded as json for posting to the
github API.
"""
return {
'title': 'teamcity: failed tests on {0}: {1}'.format(os.environ['TC_BUILD_BRANCH'],
', '.join(t[0] for t in failed_tests)),
'body': '''\
The following tests appear to have failed:
[#{0}](https://teamcity.cockroachdb.com/viewLog.html?buildId={0}):
```
{1:.60000}
```
Please assign, take a look and update the issue accordingly.
'''.format(build_id, ''.join(t[1] for t in failed_tests)),
'labels': ['C-test-failure', 'O-robot'],
}
def post_issue(issue):
req = urllib.request.Request(
'https://api.github.com/repos/cockroachdb/cockroach/issues',
data=json.dumps(issue).encode('utf-8'),
headers={'Authorization': 'token {0}'.format(os.environ['GITHUB_API_TOKEN'])})
opener.open(req).read()
if __name__ == '__main__':
build_id = os.environ['TC_BUILD_ID']
failed_tests = list(collect_build_results(build_id))
if failed_tests:
issue = create_issue(build_id, failed_tests)
post_issue(issue)
Oops, something went wrong.

No commit comments for this range