Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
Choose a Base Repository
cockroachdb/cockroach
AALEKH/cockroach
Abioy/cockroach
AflenChen/cockroach
Arifur794/cockroach
CodEnFisH/cockroach
DilipLukose/cockroach
El-Coder/cockroach
Frank-Jin/cockroach
GavinHwa/cockroach
GokulSrinivas/cockroach
GrayMissing/cockroach
HanumathRao/cockroach
HengWang/cockroach
HunterChen/cockroach
InsaneYoungStunner/cockroach
Kevin-GuanJian/cockroach
Linicks/cockroach
PragashSiva/cockroach
RaduBerinde/cockroach
SandyZeng/cockroach
Viewtiful/cockroach
XuWanHong/cockroach-1
Zemnmez/cockroach
a-robinson/cockroach
abhishekgahlot/cockroach
alex/cockroach
alisheikh/cockroach
anchal-agrawal/cockroach
andradeandrey/cockroach
angel1991521/cockroach
ansonism/cockroach
axfcampos/cockroach
banks/cockroach
bdarnell/cockroach
bdotdub/cockroach
bigrats/cockroach
bigxing/cockroach
bobpattersonjr/cockroach
bowlofstew/cockroach
brandenyoon/cockroach
briliant1991/cockroach
bussiere/cockroach
bydsky/cockroach
cDoru/cockroach
cainiao1989/cockroach
cdsalmons/cockroach
chagge/cockroach
chunshengster/cockroach
cleverdeng/cockroach
clm971910/cockroach
cn15810092493/cockroach
connecteev/cockroach
dallasmarlow/cockroach
darkseed/cockroach
db-production/cockroach
dfrsg/cockroach
diegode/cockroach
domluna/cockroach
eagle518/cockroach
easyfmxu/cockroach
eclectice/cockroach
elvin-du/cockroach
embark/cockroach
erriapo/cockroach
es-chow/cockroach
esaul/cockroach
flyingliang/cockroach
gaowenbin/cockroach
ghostsun/cockroach
gqf2008/cockroach
grimreaper/cockroach
gstarnberger/cockroach
gude/cockroach
guiquanz/cockroach
hannibalhuang/cockroach
hanshenu/cockroach
hanwoody/cockroach
hcxiong/cockroach
hollis/cockroach
hubt/cockroach
hunslater/cockroach
iamima/cockroach
icattlecoder/cockroach
ikarzali/cockroach
ilovejs/cockroach
jackylk/cockroach
jamesgraves/cockroach
jamiepg1/cockroach
jay23jack/cockroach
jess-edwards/cockroach
jinguoxing/cockroach
jmank88/cockroach
joezxy/cockroach
joliny/cockroach
jonathanmarvens/cockroach
josephwinston/cockroach
josephyzhou/cockroach
joshuawatson/cockroach
jrcjc123/cockroach
jsanc623/cockroach
kanasite/cockroach
kebohiki/cockroach
kkaneda/cockroach
kortschak/cockroach
kritivasas/cockroach
kuguobing/cockroach
lemonhall/cockroach
leomzhong/cockroach
lessc0de/cockroach
lianhuiwang/cockroach
liuzongquan/cockroach
lostz/cockroach
lshmouse/cockroach
luan-cestari/cockroach
lupengfeige/cockroach
mabdullah353/cockroach
mackjoner/cockroach
maniksurtani/cockroach
manithnuon/cockroach
markreg/cockroach
matadorhong/cockroach
meshileya/cockroach
mindis/cockroach
mixiong/cockroach
mjibson/cockroach
mobilipia/cockroach
mohae/cockroach
mrunix/cockroach
msmakhlouf/cockroach
nanderoo/cockroach
neuroradiology/cockroach
neutony/cockroach
nikelius/cockroach
nimishzynga/cockroach
nkgfirecream/cockroach
nmarasoiu/cockroach
ofonimefrancis/cockroach
oldmantaiter/cockroach
ollyblue/cockroach
petermattis/cockroach
picolonet/storage
pinterb/cockroach
pramendra/cockroach
putaozhuose/cockroach
r00tjimmy/cockroach
ramgtv/cockroach
rayleyva/cockroach
sandeepmukho/cockroach
sawanoboly/cockroach
scrooph/cockroach
sdboyer/cockroach
shafiahmed/cockroach
shanyechen/cockroach
shilezi/cockroach
silky/cockroach
slavau/cockroach
sunya123/cockroach
superneo/cockroach
swarbiv/cockroach
sxhao/cockroach
tamird/cockroach
therob3000/cockroach
timwee/cockroach
tml/cockroach
tomzhang/cockroach
toshisam/cockroach
trebogeer/cockroach
treemantris/cockroach
tristartom/cockroach
truthwzl/cockroach
tschottdorf/cockroach
udybrill/cockroach
umegaya/cockroach
vikram/cockroach
vivekmenezes/cockroach
vvydier/cockroach
waderly/cockroach
walkingsparrow/cockroach
wangtuanjie/cockroach
wheelcomplex/cockroach
willmadison/cockroach
wulinjun4/cockroach
wuyu201321060203/cockroach
wycg1984/cockroach
xiaoyulei/cockroach
yacki/cockroach
yananzhi/cockroach
yangyaoweng/cockroach
yanniyang/cockroach
yekeqiang/cockroach
yemaocheng/cockroach
yonglehou/cockroach
zeeshanali/cockroach
zhaixuezhong/cockroach
zhangchn/cockroach
zhanglei/cockroach
zhonghai/cockroach
zimmermamc/cockroach
zofuthan/cockroach
Nothing to show
Choose a Head Repository
cockroachdb/cockroach
AALEKH/cockroach
Abioy/cockroach
AflenChen/cockroach
Arifur794/cockroach
CodEnFisH/cockroach
DilipLukose/cockroach
El-Coder/cockroach
Frank-Jin/cockroach
GavinHwa/cockroach
GokulSrinivas/cockroach
GrayMissing/cockroach
HanumathRao/cockroach
HengWang/cockroach
HunterChen/cockroach
InsaneYoungStunner/cockroach
Kevin-GuanJian/cockroach
Linicks/cockroach
PragashSiva/cockroach
RaduBerinde/cockroach
SandyZeng/cockroach
Viewtiful/cockroach
XuWanHong/cockroach-1
Zemnmez/cockroach
a-robinson/cockroach
abhishekgahlot/cockroach
alex/cockroach
alisheikh/cockroach
anchal-agrawal/cockroach
andradeandrey/cockroach
angel1991521/cockroach
ansonism/cockroach
axfcampos/cockroach
banks/cockroach
bdarnell/cockroach
bdotdub/cockroach
bigrats/cockroach
bigxing/cockroach
bobpattersonjr/cockroach
bowlofstew/cockroach
brandenyoon/cockroach
briliant1991/cockroach
bussiere/cockroach
bydsky/cockroach
cDoru/cockroach
cainiao1989/cockroach
cdsalmons/cockroach
chagge/cockroach
chunshengster/cockroach
cleverdeng/cockroach
clm971910/cockroach
cn15810092493/cockroach
connecteev/cockroach
dallasmarlow/cockroach
darkseed/cockroach
db-production/cockroach
dfrsg/cockroach
diegode/cockroach
domluna/cockroach
eagle518/cockroach
easyfmxu/cockroach
eclectice/cockroach
elvin-du/cockroach
embark/cockroach
erriapo/cockroach
es-chow/cockroach
esaul/cockroach
flyingliang/cockroach
gaowenbin/cockroach
ghostsun/cockroach
gqf2008/cockroach
grimreaper/cockroach
gstarnberger/cockroach
gude/cockroach
guiquanz/cockroach
hannibalhuang/cockroach
hanshenu/cockroach
hanwoody/cockroach
hcxiong/cockroach
hollis/cockroach
hubt/cockroach
hunslater/cockroach
iamima/cockroach
icattlecoder/cockroach
ikarzali/cockroach
ilovejs/cockroach
jackylk/cockroach
jamesgraves/cockroach
jamiepg1/cockroach
jay23jack/cockroach
jess-edwards/cockroach
jinguoxing/cockroach
jmank88/cockroach
joezxy/cockroach
joliny/cockroach
jonathanmarvens/cockroach
josephwinston/cockroach
josephyzhou/cockroach
joshuawatson/cockroach
jrcjc123/cockroach
jsanc623/cockroach
kanasite/cockroach
kebohiki/cockroach
kkaneda/cockroach
kortschak/cockroach
kritivasas/cockroach
kuguobing/cockroach
lemonhall/cockroach
leomzhong/cockroach
lessc0de/cockroach
lianhuiwang/cockroach
liuzongquan/cockroach
lostz/cockroach
lshmouse/cockroach
luan-cestari/cockroach
lupengfeige/cockroach
mabdullah353/cockroach
mackjoner/cockroach
maniksurtani/cockroach
manithnuon/cockroach
markreg/cockroach
matadorhong/cockroach
meshileya/cockroach
mindis/cockroach
mixiong/cockroach
mjibson/cockroach
mobilipia/cockroach
mohae/cockroach
mrunix/cockroach
msmakhlouf/cockroach
nanderoo/cockroach
neuroradiology/cockroach
neutony/cockroach
nikelius/cockroach
nimishzynga/cockroach
nkgfirecream/cockroach
nmarasoiu/cockroach
ofonimefrancis/cockroach
oldmantaiter/cockroach
ollyblue/cockroach
petermattis/cockroach
picolonet/storage
pinterb/cockroach
pramendra/cockroach
putaozhuose/cockroach
r00tjimmy/cockroach
ramgtv/cockroach
rayleyva/cockroach
sandeepmukho/cockroach
sawanoboly/cockroach
scrooph/cockroach
sdboyer/cockroach
shafiahmed/cockroach
shanyechen/cockroach
shilezi/cockroach
silky/cockroach
slavau/cockroach
sunya123/cockroach
superneo/cockroach
swarbiv/cockroach
sxhao/cockroach
tamird/cockroach
therob3000/cockroach
timwee/cockroach
tml/cockroach
tomzhang/cockroach
toshisam/cockroach
trebogeer/cockroach
treemantris/cockroach
tristartom/cockroach
truthwzl/cockroach
tschottdorf/cockroach
udybrill/cockroach
umegaya/cockroach
vikram/cockroach
vivekmenezes/cockroach
vvydier/cockroach
waderly/cockroach
walkingsparrow/cockroach
wangtuanjie/cockroach
wheelcomplex/cockroach
willmadison/cockroach
wulinjun4/cockroach
wuyu201321060203/cockroach
wycg1984/cockroach
xiaoyulei/cockroach
yacki/cockroach
yananzhi/cockroach
yangyaoweng/cockroach
yanniyang/cockroach
yekeqiang/cockroach
yemaocheng/cockroach
yonglehou/cockroach
zeeshanali/cockroach
zhaixuezhong/cockroach
zhangchn/cockroach
zhanglei/cockroach
zhonghai/cockroach
zimmermamc/cockroach
zofuthan/cockroach
Nothing to show
Checking mergeability… Don’t worry, you can still create the pull request.
This comparison is big! We’re only showing the most recent 250 commits
Commits on Aug 24, 2018
knz
sql: desugar TEXT to STRING
The distinction between "TEXT" and "STRING" inside CockroachDB is
inconsequential. Remove it. The data type is already properly reported
in pg_catalog and information_schema as "text".

Release note: None
knz
sql: fix the handling of string types
This patch corrects two long-standing bugs in CockroachDB which were
confusing both 3rd party tools/ORMs and users:

- the SQL standard type called "CHAR" is supposed to have a default
  maximum width of 1 character.

  Prior to this patch, CockroachDB did not support this and instead
  set no default maximum width on CHAR. This would cause CockroachDB
  to fail validating the width of values inserted in tables, fail
  to constrain the width of values during casts to CHAR, and report
  incorrect information in introspection tables.

- PostgresSQL's dialect distinguishes the table column types TEXT,
  CHAR, VARCHAR and a special thing called `"char"` (this is a legacy
  PostgreSQL type which is, unfortunately, still used by some
  pg_catalog tables).

  Prior to this patch, CockroachDB would map all of these types into
  the same column type "STRING". In turn this caused them to show
  up as "text" in introspection.

  While this was not incorrect from the perspective of value
  handling (all these types are, indeed, handled with about the same
  value semantics in both PostgreSQL and CockroachDB), ORMs
  do pay attention to this distinction and become confused if they
  see a different type in introspection (e.g. "text") than what
  the application model requires (e.g. "char"). The result of this
  confusion was to trigger a schema change to adapt the type, which
  in turn fails due to missing features in ALTER COLUMN TYPE.

This patch corrects both problems by giving the appropriate default
width to CHAR and preserving the distinction between the various string
types in column descriptors and thus introspection.

Additionally, a width constraint check on collated string columns was
missing. This is now fixed too.

Tables and columns created prior to this patch are unaffected.

Release note (bug fix): CockroachDB now distinguishes "CHAR" and
"VARCHAR" as mandated by the SQL standard and PostgreSQL
compatibility. When a width is not specified (e.g. `CHAR(3)`), the
maximum width of `VARCHAR` remains unconstrained whereas the maximum
width for `CHAR` is now 1 character.

Release note (bug fix): CockroachDB now properly checks the width of
strings inserted in a collated string column with a specified width.

Release note (sql change): CockroachDB now preserves the distinction
between different column types for string values like in PostgreSQL,
for compatibility with 3rd party tools and ORMs.
knz
sql: desugar JSON to JSONB
The distinction between the type names "JSON" and "JSONB" is
inconsequential. The types are already reported properly in
introspection using the right alias. This patch removes this
distinction in code.

Release note: None
knz
sql: desugar "TIMESTAMP WITH TIME ZONE" to "TIMESTAMPTZ"
Prior to this patch the canonical name for the timestamptz type in
CockroachDB was "TIMESTAMP WITH TIME ZONE" with spaces and all
glorious 24 characters.

This is unfortunate because this is a pretty common type, and is
moreover used to disambiguate the return type of `now()` and thus will
be emitted pretty often in distsql physical plans. Having such a long
type name is unnecessary and indeed adds 50%+ storage, communication
and parsing overhead for every occurrence of this type in
distsql plans.

This patch shortens the canonical CockroachDB name to "TIMESTAMPTZ".

The representation of the type in introspection tables (pg_catalog,
information_schema) is unaffected.

Release note: None
knz
sql/coltypes: remove the field `Name` in TArray
It was not needed. This simplifies.

Release note: None
knz
sql: fix the handling of integer types
Prior to this patch, CockroachDB maintained an unnecessary distinction
between "INT" and "INTEGER", between "BIGINT" and "INT8", etc.

This distinction is unnecessary but also costly, as we were paying the
price of a "name" attribute in coltypes.TInt, with a string comparison
and hash table lookup on every use of the type.

What really matters is that the type shows up properly in
introspection; this has already been ensured by various
OID-to-pgcatalog mappings and the recently introduced
`InformationSchemaTypeName()`.

Any distinction beyond that is unnecessary and can be dropped from the
implementation.

Release note: None
craig[bot] and knz
Merge #29006
29006: backport-2.1: fix the handling of SQL types r=knz a=knz

Backport 1/1 commits from #28937.
Backport 1/1 commits from #28942.
Backport 17/17 commits from #28943.
Backport 1/1 commits from #28944.
Backport 1/1 commits from #28945.
Backport 1/1 commits from #28949.
Backport 2/2 commits from #28950.
Backport 1/1 commits from #28951 (#28959).
Backport 1/1 commits from #28952 (#28959).
Backport 1/1 commits from #28955 (#28959).
Backport 1/1 commits from #28959.
Backport 1/1 commits from #28959.
Backport 1/1 commits from #28690.

cc @cockroachdb/release 

Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
craig[bot] and tschottdorf
Merge #29030
29030: backport-2.1: storage: assorted stability fixes r=knz a=tschottdorf

Release note: None

Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
craig[bot] and asubiotto
Merge #28972
28972: release-2.1: distsql: disable EXPLAIN ANALYZE (DISTSQL) when distsql=off r=benesch a=asubiotto

Backport 1/1 commits from #28914.

/cc @cockroachdb/release

---

This makes sense since queries should not be executed through distsql if
distsql=off.

Release note: None


Co-authored-by: Alfonso Subiotto Marqués <alfonso@cockroachlabs.com>
storage: Add exponential backoff in relocate range retry loop
Retrying snapshots as aggressively as we were before this change is not
helpful in any way.

Release note: None
util/log: Warn about cost of verbosity and vmodule flags
This bit me pretty bad when doing performance testing. I turned on
vmodule for a file which hardly logs at all, but all the calls to
log.VDepth slowed the system down to a crawl.

Alternatively we could improve log.VDepth to make this less of a
concern. It's already been handcrafted to avoid allocations, but holding
the logging lock while calling runtime.Callers is not good.

Release note: None
util/log: Reduce lock overhead in VDepth
By using a sync.Pool to manage the PC buffers for us. This avoids
locking while calling runtime.Callers without needing to allocate a new
buffer every time VDepth is called.

Take the benchmarks with a grain of salt since they're just from my
laptop, but

Before the change:
$ make bench PKG=./pkg/util/log/ BENCHES=BenchmarkVDepthWithVModule
BenchmarkVDepthWithVModule-8     2000000               825 ns/op

After the change:
$ make bench PKG=./pkg/util/log/ BENCHES=BenchmarkVDepthWithVModule
BenchmarkVDepthWithVModule-8     5000000               284 ns/op

Release note: None
Commits on Aug 26, 2018
sql: make sure lookup join closes its inputs
Previously, local lookup join wasn't programmed to close itself properly
- if it wasn't running in local fallback mode, it would fail to
propagate its close signal to its upstream planNodes. This could lead to
memory account errors if the upstream planNodes were retaining memory.

Release note: None
storage,kv: remove some allocations
Guard an expensive log.V in a log.ExpensiveLogEnabled.

Pass roachpb.Lease by value in one situation in which allocating it is
pointless.

Release note: None
craig[bot] and jordanlewis
Merge #29074
29074: backport-2.1: sql: make sure lookup join closes its inputs r=jordanlewis a=jordanlewis

Backport 1/1 commits from #29073.

/cc @cockroachdb/release

---

Previously, local lookup join wasn't programmed to close itself properly
- if it wasn't running in local fallback mode, it would fail to
propagate its close signal to its upstream planNodes. This could lead to
memory account errors if the upstream planNodes were retaining memory.

Needs a backport.

Release note: None


Co-authored-by: Jordan Lewis <jordanthelewis@gmail.com>
Commits on Aug 27, 2018
dt and benesch
sql: put rendered setting value in event log entry
previously we'd include the pre-rendered expression, which is particularly useless with placeholders.

Fixes #28992.

Release note: none.
dt and benesch
sql: SET CLUSTER SETTING is not transactional
SET CLUSTER SETTING does a transactional write to the system table.
However settings values are _read_ from an in-memory cache which is
not updated until the system table write is gossiped and that gossip
update is processed by the settings worker.

Release note (sql change): SET CLUSTER SETTING cannot be used inside a transaction.
dt and benesch
sql,settings: make SET CLUSTER SETTING wait for value to update
Settings values are cached in memory and that cache is updated via gossip after a SET commits.
That delay -- for the update to arrive via gosssip and be processed and the cache updated --
can mean that a statement executed immediately following a SET CLUSTER SETTING may execute
before the new setting takes effect, and knowing when it is safe to execute a statement that
uses the new setting is tricky.

Instead, we can make SET CLUSTER SETTING _wait_ until it sees the value it set returned when
reading the settings cache (or return an error if it does not appear after a timeout of 30s).
This should make it easier to set a setting and then use that value in subsequent statements.

Release note (sql change): SET CLUSTER SETTING attempts to wait until change has been gossiped.
storage: discard a unworthwhile merge TODO
This TODO suggested removing maybeWatchForMerge by teaching the LHS
replica to reach into the RHS replica after a merge committed to unblock
requests. It failed to consider that we'd need to do the same if the
merge aborted. We don't presently have an abort trigger, so this is
excessively difficult. Simply discard the TODO; the status quo is fine.

Release note: None
storage: update variable names in Store.SplitRange
Store.SplitRange was still using the old term "range" to refer to
replicas. Also use "left" and "right" instead of "orig" and "new" for
consistency with Store.MergeRange.

Release note: None
storage: update state using trigger's desc during splits/merges
It is important that the in-memory copy of a replica's range descriptor
exactly match the on-disk copy. Previously, during splits/merges, we
would reconstruct the updates to the in-memory descriptor piecemeal.
This is dangerous, especially in mixed version clusters, where nodes can
disagree about what updates to the descriptor to perform. Notably,
splits only update the generation counter in v2.1.

Instead of trying to reconstruct the updates piecemeal during a split
or merge, simply adopt the updated descriptor from the split/merge
trigger wholesale.

We'd ideally adjust replica changes to operate the same way.
Unfortunately the updated descriptor is not currently included in the
ChangeReplicasTrigger, so the migration is rather involved.

Release note: None
storage: gate merges behind a cluster setting
Merges will explode if used in a mixed-version cluster. Gate them behind
a cluster setting.

For extra safety, also gate increments of the new generation counter in
the range descriptor behind the same cluster setting. It's not clear
exactly what would go wrong, if anything, if mixed-version clusters
incremented the generation counter, but better to be extra cautious.

Release note: None
storage: update the tscache appropriately after a merge
When applying a merge, teach the leaseholder of the LHS range to update
its timestamp cache for the keyspace previously owned by the RHS range
appropriately.

Release note: None
storage: avoid stopping twice in merge test
TestStoreRangeMergeDuringShutdown shuts down the multiTestContext when
it applies a lease for the RHS. In rare cases, the lease application can
get replayed, which previously caused the multiTestContext to get
shutdown twice, which panics. Add additional state to prevent this case.

Fix #28894.

Release note: None
roachpb,storage: rename GetSnapshotForMerge to Subsume
GetSnapshotForMerge no longer fetches a snapshot of the RHS of a merge.
Rename the method Subsume to better reflect its purpose: to freeze a
range before it is subsumed by its left-hand neighbor.

Release note: None
build: fix generation rules for settings.html
The rules for generating the settings HTML table got broken at some
point. Fix them by using target-specific variables properly: they only
apply to prerequisites of the declaring target, and are only resolved
within recipes.

Release note: None
storage: deflake TestStoreRangeMergeTimestampCacheCausality
TestStoreRangeMergeTimestampCacheCausality could time out if the merge
transaction retried, which occured about one out of every two hundred
runs, because it would try to send multiple values over a channel whose
buffer had room for only one value. Deflake the test by remembering in a
variable the value from the last merge transaction to execute rather
than using channels.

Release note: None
storage: check for in-progress merge before installing new lease
During a merge, it is possible for the RHS lease to change hands, e.g.,
when the original leaseholder dies and another member of the range
acquires the lease. In this case, the new leaseholder is responsible for
checking for a deletion intent on its local range descriptor; if it
discovers such an intent, a merge is in progress and the leaseholder is
required to block all traffic unless it can prove that the merge
aborted.

The previous implementation of this scheme had a small window in which
the new leaseholder had installed a valid lease but had not yet
installed a mergeComplete channel to block all traffic. This race was
never seen in practice, but it could, in theory, lead to a
serializability violation.

Reorder the flow post-lease acquisition so that checking for an
in-progress merge occurs before the new lease is installed.

Release note: None
storage: remove MergeMaxRHSSize setting
Now that merges do not include a snapshot of the RHS data in the merge
trigger, we no longer need a setting limiting the size of the RHS of a
merge.

Release note: None
storage: turn down default merge queue interval
Merges are relatively expensive. Set the merge queue interval to one
second so we avoid processing too many merges at once. Introduce a
cluster setting to allow users/tests to adjust the merge queue interval
if they so choose.

Fix #27769.

Release note: None
storage: check for merges in AdminSplit retry loop
The retry loop in AdminSplit can span many seconds. In that time, the
replica may lose its lease, or the range might be merged away entirely.
In either of those cases, the split can never succeed, and so the retry
loop needs to give up.

The loop was properly exiting if it noticed it lost its lease, but a
range can get merged away without losing its lease. The final lease on
that range remains valid until the liveness epoch it is tied to expires.
Teach the loop to notice that condition too by checking
Replica.IsDestroyed on every turn of the loop.

Release note: None
storage: update zone config installation in TestSystemZoneConfigs
Teach TestSystemZoneConfigs to install zone configs via SQL, rather than
the hacky testing override system, which interacts poorly with the
forthcoming on-by-default merge queue.

Release note: None
storage: prepare to enable merge queue by default
Turn off the merge queue in all tests that need it. The actual default
will be changed in a separate PR so that this commit can be safely
backported to release-2.1.

Release note: None
sql: prevent SPLIT AT when the merge queue is enabled
Splitting while the merge queue is enabled is almost certainly a user
mistake. Add a best-effort check to prevent users from splitting while
the merge queue is enabled. Users can override the check and request a
split anyway by twiddling a new session variable,
experimental_force_split_at.

We have plans to eventually make the splits created by SPLIT AT
"sticky", so that the merge queue does not immediately merge them away,
but not in time for 2.1.

Release note (sql change): ALTER TABLE ... SPLIT AT now produces an
error if executed while the merge queue is enabled, as the merge queue
is likely to immediately discard any splits created by the statement.
storage: improve consistency checker diff
Avoid spilling non-printable characters and generally garbage into the
logs.

Improve legibility and make sure that the printed key
value pair works for `./cockroach debug decode-key`.

Release note: None
cli: add and test `debug decode-value`
Inspired by #28995, where I needed this.

Release note: None
storage: fix slow quota pool log
"have been waiting 86 B attempting to acquire 1m0.000123123s of
proposal quota" didn't quite look right. The other log below didn't
look much better.

Release note: None
storage/txnwait: don't leak waiting queries in MaybeWaitForQuery
Fixes #28849.

The theorized leak in #28849 was almost spot on. I instrumented the original
code and pointed 6 workload generators running `tpcc` with `wait=false` at a
single node (adding more workers to a single workload generator wasn't as good
at creating contention). I was pretty quickly able to see the size of
`Queue.mu.queries` grow on a few hot ranges due to context cancellation in
`MaybeWaitForQuery`. It turns out that both context cancellation and the
internal timeout in `MaybeWaitForQuery` (`maxWaitCh`) could result in a memory
leak.

This change fixes this leak by doing a better job ensuring that waiters clean up
after themselves if they stop waiting prematurely. In doing so, it simplifies
the `waitingQueries` structure by changing it from a slice of channels and a
useless `*roachpb.QueryTxnRequest` (that exacerbated the leak and turned out to
be quite useful in debugging it) to a single refcounted channel. With this
structure, its straightforward for waiters to unregister prematurely and clean
up associated resources if necessary.

I could not reproduce the same behavior with `tpcc` after this change. I wasn't
able to get the size of `Queue.mu.queries` to grow without bound.

Release note (bug fix): Fix memory leak when contended queries timed out.
knz
base: make TestValidateAddrs portable
On (at least) osx the port name resolution error is different. This
patch modifies the test to catch that.

Release note: None
4 authors
Merge #29033 #29082
29033: backport-2.1: storage: improve consistency checker diff, add debug decode-key r=a-robinson a=tschottdorf

Backport 2/2 commits from #29012.

/cc @cockroachdb/release

---

Avoid spilling non-printable characters and generally garbage into the
logs.

Release note: None


29082: backport-2.1: one week of merge PRs r=knz,tschottdorf a=benesch

Backport:
  * 1/1 commits from "sql: put rendered setting value in event log entry" (#29014)
  * 2/2 commits from " sql,settings: make SET CLUSTER SETTING wait for value to update, disallow in txn" (#29063)
  * 1/1 commits from "storage: discard a unworthwhile merge TODO" (#28885)
  * 3/3 commits from "storage: gate merges behind a cluster setting " (#28865)
  * 1/1 commits from "storage: update the tscache appropriately after a merge" (#28793)
  * 1/1 commits from "storage: avoid stopping twice in merge test" (#28902)
  * 1/1 commits from "roachpb,storage: rename GetSnapshotForMerge to Subsume" (#28910)
  * 1/1 commits from "build: fix generation rules for settings.html" (#28884)
  * 1/1 commits from "storage: deflake TestStoreRangeMergeTimestampCacheCausality" (#28928)
  * 1/1 commits from "storage: check for in-progress merge before installing new lease" (#28894)
  * 6/7 commits from " storage: enable merge queue by default" (#28961)

Please see individual PRs for details.

/cc @cockroachdb/release


Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
Co-authored-by: David Taylor <tinystatemachine@gmail.com>
Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
craig[bot] and a-robinson
Merge #29049
29049: backport-2.1: util/log: Speed up vmodule support and warn about it in help text r=a-robinson a=a-robinson

Backport 2/2 commits from #29017.

/cc @cockroachdb/release

---

It's possible that with the second commit the help text change is no longer needed, but I'm curious what you think about that. I haven't tried re-running tpc-c 10k with this, so while I'm pretty sure this is a lot better I don't know how much of a performance hit there still is.


Co-authored-by: Alex Robinson <alexdwanerobinson@gmail.com>
craig[bot] and a-robinson
Merge #29048
29048: backport-2.1: storage: Add exponential backoff in relocate range retry loop r=a-robinson a=a-robinson

Backport 1/1 commits from #29013.

/cc @cockroachdb/release

---

Retrying snapshots as aggressively as we were before this change is not
helpful in any way.

Release note: None


Co-authored-by: Alex Robinson <alexdwanerobinson@gmail.com>
opt: fix LookupJoinDef interning, add tests
Fixing an omission I noticed in internLookupJoinDef and adding missing
tests for interning defs.

Release note: None
build: check out submodules before building UI protobufs
Building UI protobufs indirectly requires that submodules have been
checked out, as the Raft protobuf lives in the vendor submodule.

Fix #28593.

Release note: None
closedts/transport: avoid race when using stopper.RunWorker
Stopper.RunWorker cannot be called in code which can run concurrently
with Stopper.Stop. Use Stopper.RunAsyncTask instead.

Fix #28755.

Release note: None
storage: fix raft snapshots that span merges and splits
The code that handles Raft snapshots that span merges did not account
for snapshots that spanned merges AND splits. Handle this case by
allowing snapshot subsumption even when the snapshot's end key does not
exactly match the end of an existing replica. See the commits within the
patch for details.

Fix #29080.

Release note: None
storage: deflake TestStoreRangeMergeReadoptedBothFollowers
TestStoreRangeMergeReadoptedBothFollowers expects that attempting to
place merged range [a, c) onto a store that has orphaned pre-merge
replicas [a, b) and [b, c) fails with the following message:

    cannot apply snapshot: snapshot intersects existing range

In rare cases, the snapshot application could instead fail with:

    remote failed to apply snapshot for reason failed to apply snapshot: snapshot intersects existing range

The first message is generated by the canApplySnapshot check that occurs
when the reservation is made. The second message is generated by the
canApplySnapshot check that occurs after the snapshot is received.

The test, not unfairly, expects to only see the first message; it has
orphaned two intersecting replicas onto the receiving store, and those
intersecting replicas should fail the snapshot in the first check! It
turns out that, in rare cases, the snapshot could arrive at the
receiving store before the replicas were initialized. The snapshot would
slip through the first check, but receiving the snapshot would take some
time, and so the replicas would be initialized by the second check.

Teach the test to properly wait for the replicas on the receiving store
to be fully initialized before sending any snapshots.

Fix #29078.

Release note: None
storage: protect ComputeChecksum commands from replaying
Previously, a ComputeChecksum command could apply twice with the same
ID. Consider the following sequence of events:

  1. DistSender sends a ComputeChecksum request to a replica.
  2. The request is succesfully evaluated and proposed, but a connection
     error occurs.
  3. DistSender retries the request, leaving the checksum ID unchanged!

This would result in two ComputeChecksum commands with the same checksum
ID in the Raft log. Somewhat amazingly, this typically wasn't
problematic. If all replicas were online and reasonably up-to-date,
they'd see the first ComputeChecksum command, compute its checksum, and
store it in the checksums map. When they saw the duplicated
ComputeChecksum command, they'd see that a checksum with that ID already
existed and ignore it. In effect, only the first ComputeChecksum command
for a given checksum ID mattered.

The problem occured when one replica saw one ComputeChecksum command but
not the other. There were two ways this could occur. A replica could go
offline after computing the checksum the first time; when it came back
online, it would have an empty checksum map, and the checksum computed
for the second ComputeChecksum command would be recorded instead. Or a
replica could receive a snapshot that advanced it past one
ComputeChecksum but not the other. In both cases, the replicas could
spuriously fail a consistency check.

A very similar problem occured with range merges because ComputeChecksum
requests are incorrectly ranged (see #29002). That means DistSender
might split a ComputeChecksum request in two. Consider what happens when
a consistency check occurs immediately after a merge: the
ComputeChecksum request is generated using the up-to-date, post-merge
descriptor, but DistSender might have the pre-merge descriptors cached,
and so it splits the batch in two. Both halves of the batch would get
routed to the same range, and both halves would have the same command
ID, resulting in the same duplicated ComputeChecksum command problem.

The fix for these problems is to assign the checksum ID when the
ComputeChecksum request is evaluated. If the request is retried, it will
be properly assigned a new checksum ID. Note that we don't need to worry
about reproposals causing duplicate commands, as the MaxLeaseIndex
prevents proposals from replay.

The version compatibility story here is straightforward. The
ReplicaChecksumVersion is bumped, so v2.0 nodes will turn
ComputeChecksum requests proposed by v2.1 nodes into a no-op, and
vice-versa. The consistency queue will spam some complaints into the log
about this--it will time out while collecting checksums--but this will
stop as soon as all nodes have been upgraded to the new version.†

Note that this commit takes the opportunity to migrate
storagebase.ReplicatedEvalResult.ComputeChecksum from
roachpb.ComputeChecksumRequest to a dedicated
storagebase.ComputeChecksum message. Separate types are more in line
with how the merge/split/change replicas triggers work and avoid
shipping unnecessary fields through Raft. Note that even though this
migration changes logic downstream of Raft, it's safe. v2.1 nodes will
turn any ComputeChecksum commands that were commited by v2.0 nodes into
no-ops, and vice-versa, but the only effect of this will be some
temporary consistency queue spam. As an added bonus, because  we're
guaranteed that we'll never see duplicate v2.1-style ComputeChecksum
commands, we can properly fatal if we ever see a ComputeChecksum request
with a checksum ID that we've already computed.

† It would be possible to put the late-ID allocation behind a cluster
version to avoid the log spam, but that amounts to allowing v2.1 to
initiate known-buggy consistency checks. A bit of log spam seems
preferable.

Fix #28995.
storage: make ComputeChecksum a point request
ComputeChecksum was previously implemented as a range request, which
meant it could be split by DistSender, resulting in two ComputeChecksum
requests with identical IDs! If those split ranges get routed to the
same range (e.g. because the ranges were just merged), spurious checksum
failures could occur (#28995). Plus, the ComputeChecksum request would
not actually look at the range boundaries in the request header; it
always operated on the range's entire keyspace at the time the request
was applied.

The fix is simple: make ComputeChecksum a point request. There are no
version compatibility issues here; nodes with this commit are simply
smarter about routing ComputeChecksum requests to only one range.

Fix #29002.

Release note: None
opt: Streamline join name dup detection
Use a different algorithm for join name dup detection that does not
require allocating a map and using the slow FQString function.

Release note: None
opt: Improve statistics building perf
The existing statistics code uses two maps to store statistics. Since
most expressions end up having just a handful of column statistics,
these maps are an expensive way to store and look up stats. This commit
introduces a new dedicated data structure for maintaining and accessing
column statistics: ColStatsMap.

ColStatsMap uses inline storage to store the first 3 column statistics.
After that, it allocates a slice and constructs a lookup index. The
lookup index can handle both single and multi column statistics by using
a prefix tree stored in a single Go map[int64]int64. This cuts down on
allocations and also performs well.

Release note: None
storage: Up-replicate before removing replicas from dead stores
Fixes #25392, by preventing the following series of events:

1. Node x dies
2. We remove node x's replica of some range
3. Node y dies before we up-replicate, leaving the range unavailable (1
   out of 2 replicas dead)
4. Node x comes back online. It can't help the situation, because its
   replica was officially removed from the range.

Instead, we now up-replicate first before removing node x's replica.

Release note: None
craig[bot] and benesch
Merge #29126
29126: backport-2.1: another round of merge bug fixes r=tschottdorf a=benesch

Backport:
  * 1/1 commits from "storage: fix raft snapshots that span merges and splits" (#29083)
  * 1/1 commits from "storage: deflake TestStoreRangeMergeReadoptedBothFollowers" (#29084)
  * 1/1 commits from "storage: protect ComputeChecksum commands from replaying" (#29067)
  * 1/1 commits from "storage: make ComputeChecksum a point request" (#29079)

Please see individual PRs for details.

/cc @cockroachdb/release


Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
craig[bot] and benesch
Merge #29125
29125: backport-2.1: closedts/transport: avoid race when using stopper.RunWorker r=tschottdorf a=benesch

Backport 1/1 commits from #28903.

/cc @cockroachdb/release

---

Stopper.RunWorker cannot be called in code which can run concurrently
with Stopper.Stop. Use Stopper.RunAsyncTask instead.

Fix #28755.

Release note: None


Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
jobs: mark jobs as failed if they have no progress field
If the cluster has been upgraded to 2.1, any job that doesn't have
a progress field is from a 2.0 cluster, and we should not attempt to
resume it.

Release note (bug fix): Improve handling of jobs from old clusters.
craig[bot] and mjibson
Merge #29019
29019: release-2.1: jobs: mark jobs as failed if they have no progress field r=mjibson a=mjibson

Backport 1/1 commits from #28940.

/cc @cockroachdb/release

---

If the cluster has been upgraded to 2.1, any job that doesn't have
a progress field is from a 2.0 cluster, and we should not attempt to
resume it.

Release note (bug fix): Improve handling of jobs from old clusters.


Co-authored-by: Matt Jibson <matt.jibson@gmail.com>
4 authors
Merge #29077 #29103 #29121
29077: backport-2.1: storage,kv: remove some allocations r=jordanlewis a=jordanlewis

Backport 1/1 commits from #29075.

/cc @cockroachdb/release

---

Guard an expensive log.V in a log.ExpensiveLogEnabled.

Pass roachpb.Lease by value in one situation in which allocating it is
pointless.

cc @benesch

This seems to improve read-only kv performance by almost 5%!

Release note: None


29103: release-2.1: base: make TestValidateAddrs portable r=knz a=knz

Backport 1/1 commits from #29101.

/cc @cockroachdb/release

---

On (at least) osx the port name resolution error is different. This patch
modifies the test to catch that.

Release note: None


29121: release-2.1: opt: fix LookupJoinDef interning, add tests r=RaduBerinde a=RaduBerinde

Backport 1/1 commits from #29117.

/cc @cockroachdb/release

---

Fixing an omission I noticed in internLookupJoinDef and adding missing
tests for interning defs.

Release note: None


Co-authored-by: Jordan Lewis <jordanthelewis@gmail.com>
Co-authored-by: Raphael 'kena' Poss <knz@thaumogen.net>
Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
opt: minor ColList cleanup
Making `ColList` a type instead of an alias, and moving `ColListToSet`
to a method on this type.

Release note: None
vendor: remove bin/pprof binary
This allows us to remove our vendored "github.com/google/pprof"
dependency, which in turn means we can remove the recently
added "github.com/chzyer/readline" dependency.

Release note: None
cli: Provide helpful error message if quit run before init
Previously it would log an error but then print "ok".

Release note (cli change): Improved error message printed when quit is
run on a node that has not yet been initialized.
distsqlrun: bugfixes/perf change to joinReader
Previously, joinReader used pretty-printed string keys to correlate its
input rows with its looked-up rows. This was incorrect and inefficient.

Now, it uses bytes keys like everything else. Also, fix a bug that
caused partial key creation from EncDatums on tables with interleaves to
panic and/or return incorrect results.

```
name                      old time/op    new time/op     delta
JoinReader/rows=16-24        134µs ± 1%       57µs ± 1%   -57.39%  (p=0.000 n=9+10)
JoinReader/rows=256-24      1.54ms ± 1%     0.42ms ± 1%   -72.43%  (p=0.000 n=10+9)
JoinReader/rows=4096-24     45.4ms ± 1%     24.5ms ± 1%   -46.14%  (p=0.000 n=8+10)
JoinReader/rows=65536-24     5.03s ± 1%      4.80s ± 1%    -4.64%  (p=0.000 n=9+10)

name                      old speed      new speed       delta
JoinReader/rows=16-24     2.87MB/s ± 1%   6.74MB/s ± 1%  +134.76%  (p=0.000 n=7+10)
JoinReader/rows=256-24    4.00MB/s ± 1%  14.52MB/s ± 1%  +262.83%  (p=0.000 n=10+10)
JoinReader/rows=4096-24   2.17MB/s ± 1%   4.02MB/s ± 1%   +85.64%  (p=0.000 n=8+10)
JoinReader/rows=65536-24   310kB/s ± 0%    327kB/s ± 2%    +5.48%  (p=0.000 n=10+10)

name                      old alloc/op   new alloc/op    delta
JoinReader/rows=16-24       40.4kB ± 1%     18.1kB ± 1%   -55.08%  (p=0.000 n=10+10)
JoinReader/rows=256-24       390kB ± 0%       93kB ± 1%   -76.07%  (p=0.000 n=8+9)
JoinReader/rows=4096-24     5.96MB ± 0%     1.25MB ± 1%   -78.99%  (p=0.000 n=9+9)
JoinReader/rows=65536-24    99.2MB ± 6%     23.2MB ±15%   -76.66%  (p=0.000 n=10+10)

name                      old allocs/op  new allocs/op   delta
JoinReader/rows=16-24          646 ± 0%        179 ± 0%   -72.37%  (p=0.000 n=10+10)
JoinReader/rows=256-24       8.77k ± 0%      1.33k ± 0%   -84.83%  (p=0.000 n=10+9)
JoinReader/rows=4096-24       139k ± 0%        20k ± 0%   -85.48%  (p=0.000 n=9+9)
JoinReader/rows=65536-24     2.24M ± 1%      0.33M ± 7%   -85.14%  (p=0.000 n=9+9)
```

Release note: None
craig[bot] and a-robinson
Merge #29152
29152: backport-2.1: cli: Provide helpful error message if quit run before init r=a-robinson a=a-robinson

Backport 1/1 commits from #29146.

/cc @cockroachdb/release

---

Fixes #27957.

Previously it would log an error but then print "ok".

Release note (cli change): Improved error message printed when quit is
run on a node that has not yet been initialized.

Running quit against an uninitialized node now looks like:

```
$ ./cockroach quit --insecure
Error: node cannot be shut down before it has been initialized
Failed running "quit"
```


Co-authored-by: Alex Robinson <alexdwanerobinson@gmail.com>
craig[bot] and danhhz
Merge #28923
28923: release-2.1: changefeedccl,roachtest: more changefeed introspection for debugging r=mrtracy a=danhhz

Backport 1/1 commits from #28853.

/cc @cockroachdb/release

Closes #28922.

---

Fix up the roachtests to use the high-water timestamp in
crdb_internal.jobs, now that it exists. Also length the roachtest to get
a better sense of how variable the latencies are over a longer period.

Add a couple more metrics (changefeed.emit_nanos and changefeed.flushes)
that are helpful in debugging performance problems with the sink.
Timeseries storage is expensive, is this too many metrics?

Release note (enterprise change): Added additional monitoring metrics
for changefeeds.


Co-authored-by: Daniel Harrison <daniel.harrison@gmail.com>
knz
base: prefer IPv4 in mixed IPv4/IPv6 environments
CockroachDB has traditionally relied on name resolution by Go's
`net.Listen()` function. This code, for better or for worse, is
written to prefer IPv4 if the name provided resolves to both IPv4 and
IPv6 addresses.

When the CockroachDB code was recently changed to perform resolution
upfront, this behavior was not preserved and may have induced UX
surprise.

This patch recovers the pre-2.1 behavior by again preferring IPv4 if
both address types are available.

Release note (bug fix): CockroachDB will now again prefer using
an IPv4 listen address if a host name with both IPv4 and IPv6
addresses is provided to `--host`/`--listen-addr`/`--advertise`.
3 authors
Merge #29147 #29158
29147: release-2.1: vendor: update pprof r=nvanbenschoten a=nvanbenschoten

Backport 2/2 commits from #28834.

/cc @cockroachdb/release

---

Picks up:
- google/pprof#412
- google/pprof#414


29158: release-2.1: config: prefer IPv4 in mixed IPv4/IPv6 environments r=knz a=knz

Backport 1/1 commits from #29102.

/cc @cockroachdb/release

---

Fixes #28713.

CockroachDB has traditionally relied on name resolution by Go's
`net.Listen()` function. This code, for better or for worse, is
written to prefer IPv4 if the name provided resolves to both IPv4 and
IPv6 addresses.

When the CockroachDB code was recently changed to perform resolution
upfront, this behavior was not preserved and may have induced UX
surprise.

This patch recovers the pre-2.1 behavior by again preferring IPv4 if
both address types are available.

Release note (bug fix): CockroachDB will now again prefer using
an IPv4 listen address if a host name with both IPv4 and IPv6
addresses is provided to `--host`/`--listen-addr`/`--advertise`.


Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
opt: hide stats in logprops tests
Hiding stats in the logprops tests since stats have their own tests.

Release note: None
knz
cli: avoid warning two times about deprecated flags with --background
Prior to this patch, `cockroach start` would produce flag deprecation
warnings two times if `--background` was also specified.

This patch fixes that.

Release note: None
knz
cli/start: improve readiness signalling
Prior to this patch, `cockroach start` would only write the PID to the
file specified by `--pid-file` when the server was done
bootstrapping. This prevented other proceses to use the PID file as a
signal that a non-bootstrapped server has started listening (but may
not have bootstrapped yet). This patch changes the behavior to make
the PID available as soon as the server listens.

In contrast, the listening URL is still only written after the server
has done bootstrapping and is able to serve SQL sessions. This remains
the reliable way to wait until a server is ready to accept clients.

Also, prior to this patch, `cockroach start` would still print stuff
to the standard output after going to the background. This is poor UX
because at that point the shell has taken control of the terminal
again. This patch avoids outputting to the standard streams if running
in the background.

Release note (cli change): `cockroach start` now emits the PID of the
server process to the file specified with `--pid-file` as soon as it
is ready to accept network connections, but possibly before it is done
boostrapping (i.e. `cockroach init` completes). To wait for SQL
readiness, use `--listen-url-file` instead.

Release note (bug fix): `cockroach start` now avoids printing messages
to standard output after it has detached to the background when
`--background` is specified.
craig[bot] and nvanbenschoten
Merge #29097 #29098 #29099
29097: backport-2.1: cli: remove "pretty" as display_format option r=nvanbenschoten a=nvanbenschoten

Backport 1/1 commits from #28968.

/cc @cockroachdb/release

---

This hasn't been an option since #28465.

Release note: None


29098: backport-2.1: storage: fix slow quota pool log r=nvanbenschoten a=nvanbenschoten

Backport 1/1 commits from #29054.

/cc @cockroachdb/release

---

"have been waiting 86 B attempting to acquire 1m0.000123123s of
proposal quota" didn't quite look right. The other log below didn't
look much better.

Release note: None


29099: backport-2.1: storage/txnwait: don't leak waiting queries in MaybeWaitForQuery r=nvanbenschoten a=nvanbenschoten

Backport 1/1 commits from #29025.

/cc @cockroachdb/release

---

Fixes #28849.

The theorized leak in #28849 was almost spot on. I instrumented the original
code and pointed 6 workload generators running `tpcc` with `wait=false` at a
single node (adding more workers to a single workload generator wasn't as good
at creating contention). I was pretty quickly able to see the size of
`Queue.mu.queries` grow on a few hot ranges due to context cancellation in
`MaybeWaitForQuery`. It turns out that both context cancellation and the
internal timeout in `MaybeWaitForQuery` (`maxWaitCh`) could result in a memory
leak.

This change fixes this leak by doing a better job ensuring that waiters clean up
after themselves if they stop waiting prematurely. In doing so, it simplifies
the `waitingQueries` structure by changing it from a slice of channels and a
useless `*roachpb.QueryTxnRequest` (that exacerbated the leak and turned out to
be quite useful in debugging it) to a single refcounted channel. With this
structure, its straightforward for waiters to unregister prematurely and clean
up associated resources if necessary.

I could not reproduce the same behavior with `tpcc` after this change. I wasn't
able to get the size of `Queue.mu.queries` to grow without bound.

Release note (bug fix): Fix memory leak when contended queries time out.


Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
Commits on Aug 28, 2018
craig[bot] and andy-kimball
Merge #29127
29127: release-2.1: opt: Increase performance of small queries r=andy-kimball a=andy-kimball

Backport 2/2 commits from #29026.

/cc @cockroachdb/release

---

This PR contains two commits that increase the performance of
small, simple queries:

- Improve statistics building perf
- Streamline join name dup detection

Together, these changes result in these bench test improvements:
```
name                      old time/op  new time/op  delta
Phases/kv-read/Explore    28.0µs ± 2%  26.1µs ± 3%  -6.94%  (p=0.008 n=5+5)
Phases/planning1/Explore  10.5µs ± 3%  10.3µs ± 2%    ~     (p=0.310 n=5+5)
Phases/planning2/Explore  31.2µs ± 2%  30.7µs ± 3%    ~     (p=0.222 n=5+5)
Phases/planning3/Explore  36.2µs ± 2%  34.3µs ± 3%  -5.28%  (p=0.008 n=5+5)
Phases/planning4/Explore  44.2µs ± 1%  41.5µs ± 2%  -6.00%  (p=0.008 n=5+5)
Phases/planning5/Explore  35.9µs ± 1%  34.1µs ± 2%  -5.10%  (p=0.008 n=5+5)
Phases/planning6/Explore  63.8µs ± 3%  59.3µs ± 3%  -7.12%  (p=0.008 n=5+5)
```


Co-authored-by: Andrew Kimball <andyk@cockroachlabs.com>
util: fix test failure with go 1.11
Fixing a crash in the crash reporting test, related to using an empty
`runtime.TypeAssertionError`.

Release note: None
craig[bot] and RaduBerinde
Merge #29143 #29159
29143: release-2.1: opt: minor ColList cleanup r=RaduBerinde a=RaduBerinde

Backport 1/1 commits from #29120.

/cc @cockroachdb/release

---

Making `ColList` a type instead of an alias, and moving `ColListToSet`
to a method on this type.

Release note: None


29159: release-2.1: opt: hide stats in logprops tests r=RaduBerinde a=RaduBerinde

Backport 1/1 commits from #29141.

/cc @cockroachdb/release

---

Hiding stats in the logprops tests since stats have their own tests.

Release note: None


Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
distsqlrun: bump MinAcceptedVersion to 21
This change makes 2.0 nodes not schedule distsql work on 2.1 nodes. This
change is being made because we don't adequately test clusters in
a skewed version environment. This means we could have missed actual
breaking changes that should have bumped the min version but didn't. In
lieu of that testing, prevent cross version work from being done.

For example, we have already identified a number of IMPORT problems in
skewed version clusters that are best fixed by preventing the scheduling
of work from 2.0 onto 2.1 nodes. However it is also possible there are
other unknown breakages in distsql land.

This does mean that SQL workloads in a version upgrade environment will
experience performance loss if the entire cluster can't be used and data
has to be streamed to a gateway.

Release note (sql change): Prevent 2.0 nodes from scheduling distsql
work on 2.1 nodes.
craig[bot] and knz
Merge #29160
29160: release-2.1: cli: miscellaneous fixes r=knz a=knz

Backport 2/2 commits from #29107.

/cc @cockroachdb/release

---

Fixes #28775.




Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
knz
cli: unbreak the windows build
Release note: None
server: use a timer instead of a ticker for runtime stats gathering
A ticker attempts to signal its channel every interval duration, while
the timer pattern waits the interval duration between signals. For
runtime stats gathering, the call to `runtime.ReadMemStats()` can take
longer than the stats interval on large heaps.

See #27775

Release note: None
server: make RuntimeStatSampler field a pointer
RuntimeStatSampler is a large struct. Returning it by value is a
pessimization.

Release note: None
server: add a separate sampling loop for Go mem stats
`runtime.ReadMemStats` will block waiting for any running GC cycle to
finish. On a large heap this can take upwards of 10s which is in excess
of the default frequency that we sample runtime statistics (10s).

Fixes #27775

Release note: none
3 authors
Merge #29166 #29176
29166: release-2.1: util: fix test failure with go 1.11 r=RaduBerinde a=RaduBerinde

Backport 1/1 commits from #29155.

/cc @cockroachdb/release

---

Fixing a crash in the crash reporting test, related to using an empty
`runtime.TypeAssertionError`.

Release note: None


29176: release-2.1: cli: unbreak the windows build r=knz a=knz

Backport 1/1 commits from #29175.

/cc @cockroachdb/release

---

Release note: None


Co-authored-by: Radu Berinde <radu@cockroachlabs.com>
Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
3 authors
Merge #29129 #29177
29129: backport-2.1: storage: Up-replicate before removing replicas from dead stores r=a-robinson a=a-robinson

Backport 1/1 commits from #28875.

/cc @cockroachdb/release

---

Fixes #25392, by preventing the following series of events:

1. Node x dies
2. We remove node x's replica of some range
3. Node y dies before we up-replicate, leaving the range unavailable (1
   out of 2 replicas dead)
4. Node x comes back online. It can't help the situation, because its
   replica was officially removed from the range.

Instead, we now up-replicate first before removing node x's replica.

Release note: None


29177: release-2.1: server: add a separate sampling loop for Go mem stats r=tschottdorf a=petermattis

Backport 5/5 commits from #29061.

/cc @cockroachdb/release

---

`runtime.ReadMemStats` will block waiting for any running GC cycle to
finish. On a large heap this can take upwards of 10s which is in excess of
the default frequency that we sample runtime statistics (10s).

Fixes #27775

Release note: none


Co-authored-by: Alex Robinson <alexdwanerobinson@gmail.com>
Co-authored-by: Peter Mattis <petermattis@gmail.com>
craig[bot] and benesch
Merge #29124
29124: backport-2.1: build: check out submodules before building UI protobufs r=rolandcrosby a=benesch

Backport 1/1 commits from #28627.

/cc @cockroachdb/release

---

Building UI protobufs indirectly requires that submodules have been
checked out, as the Raft protobuf lives in the vendor submodule.

Fix #28593.

Release note: None


Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
storage: cleanup applyTimestampCache
Stop pretending that the "bumped" returned value is meaningful is an
error is returned. It was unused and confusing.

Release note: None
storage: deflake TestStoreRangeMergeRaftSnapshot
This test expects to see the number of Raft snapshots applied to the
store increment after applying a Raft snapshot, but store metrics are
incremented a hair after the snapshot is successfully applied to disk.
Wrap the check in a test.SucceedsSoon so that we're protected if the
check races with the increment.

In the resulting code, the splitKeys variable was only used in two
places, so inline it to improve readability.

Release note: None
opt: error out on correlated Exists and Any subqueries
Adding missing checks for outer columns on Exists and Any subqueries.
These are cases where we couldn't even convert to an apply variant.

Fixes #28816.

Release note: None
Merge pull request #29168 from mjibson/backport2.1-28977
release-2.1: distsqlrun: bump MinAcceptedVersion to 21
storage: change the errors generated by BeginTxn replay protection
Background:
BeginTransaction requests are protected from "replays" (specifically,
they're prevented from executing after an EndTransaction) by a timestamp
cache entry populated by the EndTxn. Upon finding such an entry, the
replica used to return one of two errors: either a
TransactionReplayError (if the timestamp cache entry had our own txn
id in it), or a retriable TransactionRetryError (if the ts cache lost
information, or specifically after a lease transfer). The justification
why a retriable error is safe is because, if a client actually gets it
as a retriable error, then it wasn't a replay but something else and so
the txn hadn't committed before. Rationalizing, if the error is
received unchange by the client, then it must be generated by the first
attempt to execute that BeginTxn; otherwise, if the error were generated
by a store-level re-evaluation, the store would turn it into an
AmbiguousResultError. If it were generated by a duplicate RPC sent by
the DistSender, then the DistSender would turn it into an
AmbiguousCommitError if it had also sent a commit (or, alternatively,
the DistSender might have given up on waiting for a response on this RPC
in which case the client doesn't get any error).

This patch gets rid of TransactionReplayError, and makes the replay
detecting always generate a retryable TransactionAbortedError. This
makes the behavior uniform across the two cases; there's no need for one
to be retriable an the other not.
TransactionAbortedError has been chose over TransactionRetryError
because it seems more honest - since there's no txn record, the client
might as well create a new transaction rather than bump the epoch of the
old one (since bumping the epoch is an optimization supposed to be used
when a txn record and some intents exist).

Besides the general sanity of handling the two cases uniformly, the case
where the timestamp cache has an entry with the specific id (the
previously non-retriable case) has become more common recently since the
policy for the TxnCoordSender "async rollbacks" has changed: as soon as
the heartbeat loop detects that the txn its heartbeating has been
aborted, it sends a rollback. As strange as it sounds, this rollback can
race with the BeginTxn. A scenario is detailed here:
#28270 (comment)

Comments on TransactionAbortedError have been updated to read more like
instructions from the server to the client about what the client should
do (i.e. the create a new txn) rather than trying to specify the
situation in which the error was generated (which was already a bankrupt
attempt since the error was already generated in a number of distinct
circumstances). A reason enum was added to help a debugger distinguish
between the circumstances when the error was created.

Fixes #28270
Fixes #28231

Release note: None
opt: Add regression test cases
Add regression test cases derived from open Github cases, in order
to minimize the chance that the issues reappear in the future. In
addition, update the default `ExprView.String` method to not always
show fully-qualified names (only show them when necessary).

Release note: None
3 authors
Merge #29188 #29195
29188: backport2.1:  storage: change the errors generated by BeginTxn replay protection  r=andreimatei a=andreimatei

backport of #28988 
cc @cockroachdb/release 

Background:
BeginTransaction requests are protected from "replays" (specifically,
they're prevented from executing after an EndTransaction) by a timestamp
cache entry populated by the EndTxn. Upon finding such an entry, the
replica used to return one of two errors: either a
TransactionReplayError (if the timestamp cache entry had our own txn
id in it), or a retriable TransactionRetryError (if the ts cache lost
information, or specifically after a lease transfer). The justification
why a retriable error is safe is because, if a client actually gets it
as a retriable error, then it wasn't a replay but something else and so
the txn hadn't committed before. Rationalizing, if the error is
received unchange by the client, then it must be generated by the first
attempt to execute that BeginTxn; otherwise, if the error were generated
by a store-level re-evaluation, the store would turn it into an
AmbiguousResultError. If it were generated by a duplicate RPC sent by
the DistSender, then the DistSender would turn it into an
AmbiguousCommitError if it had also sent a commit (or, alternatively,
the DistSender might have given up on waiting for a response on this RPC
in which case the client doesn't get any error).

This patch gets rid of TransactionReplayError, and makes the replay
detecting always generate a retryable TransactionAbortedError. This
makes the behavior uniform across the two cases; there's no need for one
to be retriable an the other not.
TransactionAbortedError has been chose over TransactionRetryError
because it seems more honest - since there's no txn record, the client
might as well create a new transaction rather than bump the epoch of the
old one (since bumping the epoch is an optimization supposed to be used
when a txn record and some intents exist).

Besides the general sanity of handling the two cases uniformly, the case
where the timestamp cache has an entry with the specific id (the
previously non-retriable case) has become more common recently since the
policy for the TxnCoordSender "async rollbacks" has changed: as soon as
the heartbeat loop detects that the txn its heartbeating has been
aborted, it sends a rollback. As strange as it sounds, this rollback can
race with the BeginTxn. A scenario is detailed here:
#28270 (comment)

Comments on TransactionAbortedError have been updated to read more like
instructions from the server to the client about what the client should
do (i.e. the create a new txn) rather than trying to specify the
situation in which the error was generated (which was already a bankrupt
attempt since the error was already generated in a number of distinct
circumstances). A reason enum was added to help a debugger distinguish
between the circumstances when the error was created.

Fixes #28270
Fixes #28231

Release note: None

29195: backport-2.1: storage: deflake TestStoreRangeMergeRaftSnapshot r=tschottdorf a=benesch

Backport 1/1 commits from #29133.

/cc @cockroachdb/release

---

This test expects to see the number of Raft snapshots applied to the
store increment after applying a Raft snapshot, but store metrics are
incremented a hair after the snapshot is successfully applied to disk.
Wrap the check in a test.SucceedsSoon so that we're protected if the
check races with the increment.

In the resulting code, the splitKeys variable was only used in two
places, so inline it to improve readability.

Fix #29108.

Release note: None


Co-authored-by: Andrei Matei <andrei@cockroachlabs.com>
Co-authored-by: Nikhil Benesch <nikhil.benesch@gmail.com>
cdc/roachtest: Add Kafka "Chaos" Test
Adds a new version of the CDC test which adds "chaos" to the kafka
service by periodically stopping and starting kafka while a changefeed
is still actively pushing to kafka.

This test currently fails, because this scenario is not supported at
all; a failure of the Kafka process will immediately (and permanently)
move the changefeed to a "failed" status. Once this is no longer the
case, we may want to expand the criteria for a successful recovery.

Release note: none.
Merge pull request #29210 from RaduBerinde/backport2.1-29182
release-2.1: opt: error out on correlated Exists and Any subqueries
cli: Fix support for deprecated --http-host flag
It looks like we accidentally stopped registering the --http-host flag
in the cli package's init function recently, which completely removed
support for --http-host rather than the intended effect of deprecating
(but continuing to allow) it.

Also make the handling of deprecated/alias flags more consistent with
each other while I'm here.

Release note (bug fix): Fix support for --http-host flag that was broken
in previous 2.1 beta releases.
security: relax checks on certificate fields.
Fixes #29185.

Remove all checks on certificate KeyUsage and ExtendedKeyUsage.

These have historically been so badly misused that Go does not even
check them. Per the blurb in
[x509/verify.go](https://github.com/golang/go/blob/master/src/crypto/x509/verify.go#L676):

```
  // KeyUsage status flags are ignored. From Engineering Security, Peter
  // Gutmann: A European government CA marked its signing certificates
as
  // being valid for encryption only, but no-one noticed. Another
  // European CA marked its signature keys as not being valid for
  // signatures. A different CA marked its own trusted root certificate
  // as being invalid for certificate signing. Another national CA
  // distributed a certificate to be used to encrypt data for the
  // country’s tax authority that was marked as only being usable for
  // digital signatures but not for encryption. Yet another CA reversed
  // the order of the bit flags in the keyUsage due to confusion over
  // encoding endianness, essentially setting a random keyUsage in
  // certificates that it issued. Another CA created a self-invalidating
  // certificate by adding a certificate policy statement stipulating
  // that the certificate had to be used strictly as specified in the
  // keyUsage, and a keyUsage containing a flag indicating that the RSA
  // encryption key could only be used for Diffie-Hellman key agreement.
```

Running cockroach (client/server) with absolutely no Key Usages works
just fine.

Release note (general change): remove checks on certificate key usages.
Merge pull request #29154 from jordanlewis/backport2.1-28980
backport-2.1: distsqlrun: bugfixes/perf change to joinReader
Commits on Aug 29, 2018
opt: flag to configure TestExternal data directory
Adding a flag that controls the path for `TestExternal`. This will be
used with running tests from a separate repository.

Release note: None
Merge pull request #29220 from a-robinson/backport2.1-29213
backport-2.1: cli: Fix support for deprecated --http-host flag
Merge pull request #29225 from RaduBerinde/backport2.1-29183
release-2.1: opt: flag to configure TestExternal data directory
craig[bot] and mrtracy
Merge #29216
29216: backport-2.1: cdc/roachtest: Add Kafka "Chaos" Test r=mrtracy a=mrtracy

Backport 1/1 commits from #29047.

/cc @cockroachdb/release

---

Adds a new version of the CDC test which adds "chaos" to the kafka
service by periodically stopping and starting kafka while a changefeed
is still actively pushing to kafka.

This test currently fails, because this scenario is not supported at
all; a failure of the Kafka process will immediately (and permanently)
move the changefeed to a "failed" status. Once this is no longer the
case, we may want to expand the criteria for a successful recovery.

Release note: none.


Co-authored-by: Matt Tracy <matt@cockroachlabs.com>
storage: deflake TestReplicateQueueRebalance
Disable the merge queue in this test as it interferes with the splits
that are performed manually.

Fixes #28563

Release note: None
storage: log Epoch lease only when changing hands
This should noticeably denoise the logs for clusters that have
lots of ranges. After restarting a node in such a cluster, a
long tail of these messages would crop up as the various queues
acquired lease on otherwise dormant ranges previously lead by
the restarted node.

I think we'll need to do more to reign in these messages in
the future, but this is a good start.

Release note: None
craig[bot] and petermattis
Merge #29237
29237: backport-2.1: storage: deflake TestReplicateQueueRebalance r=tschottdorf a=benesch

Backport 1/1 commits from #29221.

/cc @cockroachdb/release

---

Temporarily disable the merge queue in this test.

Fixes #28563

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
opt: rework lookup join rules
The PushJoinThroughIndexJoin rules result in too many combinations
being considered. For example, if we are joining two tables with three
indexes each, we will consider joining all possible pairs of indexes,
followed by index joins as needed. The main purpose of these rules is
to generate lookup joins (via a subsequent application of a
GenerateLookupJoin rule). Note that this won't work at all anymore if
we stop generating index joins with non-constrained scans.

This change reworks the lookup join exploration rules to run off of
the "canonical" Scan operator and identify indexes for lookup joins
(as opposed to relying on the memo having Scan/IndexJoin expressions
for all indexes).

This actually simplifies the high-level rules because some of the
cases are handled internally by the exploration function.

The new rules construct lookup join groups so we need to implement
logical properties and stats for them.

Release note: None
craig[bot] and mberhault
Merge #29223
29223: release-2.1: security: relax checks on certificate fields. r=mberhault a=mberhault

Backport 1/1 commits from #29193.

/cc @cockroachdb/release

---

Fixes #29185.

Remove all checks on certificate KeyUsage and ExtendedKeyUsage.

These have historically been so badly misused that Go does not even
check them. Per the blurb in
[x509/verify.go](https://github.com/golang/go/blob/master/src/crypto/x509/verify.go#L676):

```
  // KeyUsage status flags are ignored. From Engineering Security, Peter
  // Gutmann: A European government CA marked its signing certificates
as
  // being valid for encryption only, but no-one noticed. Another
  // European CA marked its signature keys as not being valid for
  // signatures. A different CA marked its own trusted root certificate
  // as being invalid for certificate signing. Another national CA
  // distributed a certificate to be used to encrypt data for the
  // country’s tax authority that was marked as only being usable for
  // digital signatures but not for encryption. Yet another CA reversed
  // the order of the bit flags in the keyUsage due to confusion over
  // encoding endianness, essentially setting a random keyUsage in
  // certificates that it issued. Another CA created a self-invalidating
  // certificate by adding a certificate policy statement stipulating
  // that the certificate had to be used strictly as specified in the
  // keyUsage, and a keyUsage containing a flag indicating that the RSA
  // encryption key could only be used for Diffie-Hellman key agreement.
```

Running cockroach (client/server) with absolutely no Key Usages works
just fine.

Release note (general change): remove checks on certificate key usages.


Co-authored-by: marc <marc@cockroachlabs.com>
server: deflake TestRapidRestarts
In go1.10 and earlier, it was not safe to call `http.ServeMux.ServeHTTP`
concurrently with `http.ServeMux.Handle`. (This is fixed in go1.11). In
the interim, provide our own safeServeMux wrapper that provides proper
locking.

Fixes #29227

Release note: None
sql: correctly resolve schema mutations during TRUNCATE
related to #27687

Release note (sql change): fixes table descriptor corruption when
TRUNCATE TABLE is run while DROP COLUMN is being processed.
sql: mark schema change job as successful when table DROP/TRUNCATE
The job is modified in the same transaction as the user's
DROP/TRUNCATE transaction.

Release note (sql change): Fixes problem where when a DROP/TRUNCATE is
run while a schema change like CREATE INDEX is running, the corresponding
job would stay running indefinitely.
sql: add job id to ROLL BACK job for schema change
this will allow a user to know what original job
the roll back job is referring to.

fixes #28858

Release note: None
craig[bot] and petermattis
Merge #29259
29259: release-2.1: server: deflake TestRapidRestarts r=benesch a=petermattis

Backport 1/1 commits from #29230.

/cc @cockroachdb/release

---

In go1.10 and earlier, it was not safe to call `http.ServeMux.ServeHTTP`
concurrently with `http.ServeMux.Handle`. (This is fixed in go1.11). In
the interim, provide our own safeServeMux wrapper that provides proper
locking.

Fixes #29227

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
Merge pull request #29254 from RaduBerinde/backport2.1-28924
release-2.1: opt: rework lookup join rules
testutils/lint: add a nightly lint test
Move TestHelpURLs to a nightly lint test. It isn't terribly useful to be
failing CI builds due to flaky internet.

Release note: None
sql: fix crash caused by missing memory account
Previously, wrapped local planNodes within subqueries would not always
be properly hooked up with memory accounts, leading to crashes.

Release note: None
craig[bot] and vivekmenezes
Merge #29262
29262: backport-2.1: sql: fix two bugs with TRUNCATE r=vivekmenezes a=vivekmenezes

Backport 3/3 commits from #28721.

/cc @cockroachdb/release

---

sql: correctly resolve schema mutations during TRUNCATE
related to #27687

sql: mark schema change job as successful when table DROP/TRUNCATE
The job is modified in the same transaction as the user's DROP/TRUNCATE transaction.



Co-authored-by: Vivek Menezes <vivek@cockroachlabs.com>
sql: fix problem with TRUNCATE messing up SEQUENCE dependency
While truncating a table, the column dependency to a sequence
was being retained correctly, but the reference from the sequence
back to the table was not getting updated, resulting in the
sequence retaining a reference to the old TRUNCATED table.

fix #29010

Release note (sql change): Fixed problem with messing up schema
on running TRUNCATE on a table with a reference to a sequence.
3 authors
Merge #29263 #29277
29263: backport-2.1: sql: add job id to ROLL BACK job for schema change r=vivekmenezes a=vivekmenezes

Backport 1/1 commits from #28857.

/cc @cockroachdb/release

---

this will allow a user to know what original job
the roll back job is referring to.

Release note: None


29277: release-2.1: testutils/lint: add a nightly lint test r=benesch a=petermattis

Backport 1/1 commits from #29265.

/cc @cockroachdb/release

---

Move TestHelpURLs to a nightly lint test. It isn't terribly useful to be
failing CI builds due to flaky internet.

Release note: None


Co-authored-by: Vivek Menezes <vivek@cockroachlabs.com>
Co-authored-by: Peter Mattis <petermattis@gmail.com>
knz
base: fix TestValidateAddrs
The code prefers IPv4 now; make the test prefer IPv4 too.

Release note: None
3 authors
Merge #29292 #29304
29292: backport-2.1: sql: fix crash caused by missing memory account r=jordanlewis a=jordanlewis

Backport 1/1 commits from #29286.

/cc @cockroachdb/release

---

Previously, wrapped local planNodes within subqueries would not always
be properly hooked up with memory accounts, leading to crashes.

Fixes #29205.

Release note: None


29304: release-2.1: base: fix TestValidateAddrs r=knz a=knz

Backport 1/1 commits from #29287.

/cc @cockroachdb/release

---

Fixes #29186.

The code prefers IPv4 now; make the test prefer IPv4 too.
(This is not captured in CI because the failure is macOS-specific)

Release note: None


Co-authored-by: Jordan Lewis <jordanthelewis@gmail.com>
Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
changefeedccl: move metrics into their own file
No change, only code movement.

Release note: None
changefeedccl: add an experimental-sql sink
This is the [Table Sink] described by the RFC.

sqlSink mirrors the semantics offered by kafkaSink as closely as
possible, but writes to a SQL table (presumably in CockroachDB).
Currently only for testing.

Each emitted row or resolved timestamp is stored as a row in the table.
Each table gets 3 partitions. Similar to kafkaSink, the order between
two emits is only preserved if they are emitted to by the same node and
to the same partition.

Release note: None

[table sink]: https://github.com/cockroachdb/cockroach/blob/release-2.1/docs/RFCS/20180501_change_data_capture.md#sql-table-sink
craig[bot] and andy-kimball
Merge #29212
29212: release-2.1: opt: Add regression test cases r=andy-kimball a=andy-kimball

Backport 1/1 commits from #29119.

/cc @cockroachdb/release

---

Add regression test cases derived from open Github cases, in order
to minimize the chance that the issues reappear in the future. In
addition, update the default `ExprView.String` method to not always
show fully-qualified names (only show them when necessary).

Release note: None


Co-authored-by: Andrew Kimball <andyk@cockroachlabs.com>
Commits on Aug 30, 2018
gossip: replace tickers with timers
The previous code was only jittering the timer intervals once when the
tickers were created instead of on each interval.

Fixes #28973

Release note: None
gossip: propagate a connectivity map through gossip
Each node now periodically gossips the key `gossip-clients:<node-id>`
where the value is a comma separated list of nodes to which it has open
client connections. This key is gossipped at a fairly rapid frequency (2
secs) and with a low TTL (4 secs) so that nodes can have a very up to
date picture of the current connectivity of the gossip network. Note
that because it is only a single key being gossipped, the total data
volume this consumes will be very low in comparison to normal traffic or
to Raft hearbearts.

A future extension of this work would be for gossip to stop relying on
the sentinel key being gossipped and instead use a signal based on the
connectivity graph being fully connected.

Release note: None
roachtest: add gossip roachtest
The gossip roachtest starts a 9 node cluster and then randomly kills a
node and verifies that the gossip network stabilizes to the same state
on every node.

Release note: None
gossip,storage: adjust gossip timings
Reduce the sentinel gossip TTL and interval from 2m / 1m, to 4.5s /
2.25s. The sentinel key informs gossip whether it is connected to the
primary gossip network or a partition and thus needs a short TTL so that
partitions are fixed quickly. In particular, partitions need to resolve
faster that the expiration-based lease timeout (9s) or node liveness
will be adversely affected.

Reduce the gossip stall check interval from 30s to 2s. The stall check
interval also affects how quickly gossip partitions are noticed and
repaired. The stall check itself is very cheap, so this produces no load
on the system.

Release note (bug fix): Reduce the duration of partitions in the gossip
network when a node crashes in order to eliminate a cause of temporary
data unavailability.

Fixes #27731
gossip: do not invoke callbacks if an info's value is unchanged
Previously, gossip would invoke a callback whenever a new info that
matched the callback pattern was received, even if the info's value was
unchanged. There were a handful of gossip uses (the table-stats cache
and the store-pool) that were relying on this behavior which now opt-in
to it via the `gossip.Redundant` callback option.

Release note: None
craig[bot] and tschottdorf
Merge #29249
29249: backport-2.1: storage: log Epoch lease only when changing hands r=a-robinson a=tschottdorf

Backport 1/1 commits from #29113.

/cc @cockroachdb/release

---

This should noticeably denoise the logs for clusters that have
lots of ranges. After restarting a node in such a cluster, a
long tail of these messages would crop up as the various queues
acquired lease on otherwise dormant ranges previously lead by
the restarted node.

I think we'll need to do more to reign in these messages in
the future, but this is a good start.

Release note: None


Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
craig[bot] and tschottdorf
Merge #29250
29250: backport-2.1: workload: only change merge settings if they exist r=tschottdorf a=tschottdorf

Backport 1/1 commits from #29138.

/cc @cockroachdb/release

---

Fixes #29231.
Fixes #29232.

Release note: None


Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
kv: silence spammy log
This can fill pages of logs when a cluster starts, especially when it
never manages to connect go Gossip.

Release note: None
roachtest: observe ctx cancellation in Run
Before this patch, roachprod invocations would not observe
ctx cancellation. Or rather they would, but due to the usual
obscure passing of Stdout into the child process of roachprod
the Run call would not return until the child had finished.
As a result, the test would continue running, which is annoying
and also costs money.

Also fixes up the handling of calling `c.t.Fatal` on a monitor
goroutine (using what is perhaps unspecified behavior of the
Go runtime). Anyway, the result is that you can do basically
whatever inside of a monitor and get away with it:

```go
m.Go(func(ctx context.Context) error {
	// Make sure the context cancellation works (not true prior to the PR
	// adding this test).
	return c.RunE(ctx, c.Node(1), "sleep", "2000")
})
m.Go(func(ctx context.Context) error {
	// This will call c.t.Fatal which also used to wreak havoc on the test
	// harness. Now it exits just fine (and all it took were some mean hacks).
	// Note how it will exit with stderr and stdout in the failure message,
	// which is extremely helpful.
	c.Run(ctx, c.Node(1), "echo foo && echo bar && notfound")
	return errors.New("impossible")
})
m.Wait()
```

now returns

```
--- FAIL: tpmc/w=1/nodes=3 (0.24s)
...,errgroup.go:58: /Users/tschottdorf/go/bin/roachprod run local:1 -- echo foo && echo bar && notfound returned:
	stderr:
	bash: notfound: command not found
	Error:  exit status 127

	stdout:
	foo
	bar
	: exit status 1
...,tpcc.go:661: Goexit() was called
FAIL
```

Release note: None
roachtest: don't use flags v2.0 doesn't know
Reverts a tiny part of #27800.

Fixes #29260.
Fixes #29261.

Release note: None
roachtest: disable merge queue via query that works on v2.0
We renamed the first column in 2.1.

Release note: None
roachtest: run mixed version tests against v2.0.5
v2.0.0 contains many bugs that we don't need to revisit.

Release note: None
roachtest: further clean up mixedVersion test
Remove spurious TODO, use a monitor instead of an errgroup, fix the TPCC
invocation which previously was barely doing anything.
roachtest: remove hack from version test
It's now running against v2.0.5 which does not require the workaround.

Release note: None
roachtest: skip queue test
We know that this is going to fail until we pick up #17229 again, which
is not going to happen during the stability period.

Release note: None
opt: fix building of aggregates with outer columns
This commit fixes a bug in which aggregates containing
outer columns were associated with the wrong scope.

For example, consider this query:

  SELECT (SELECT max(a.x) FROM b) FROM a;

Previously, the inner (subquery) scope was selected as the grouping scope
for max(a.x). However, this was incorrect. The outer scope should
have been selected instead.

In order to fix this issue, this commit makes significant changes
to the way queries are built by the optbuilder. It is difficult
to determine whether a particular select clause needs aggregation without
first building the arguments to the aggregates to see whether there are outer
columns. Therefore, this commit makes the following change: before any
expressions in the SELECT list are built, all aggregates in the SELECT,
DISTINCT ON, ORDER BY, and HAVING clauses are replaced with an aggregateInfo.
The aggregateInfo contains the function definition, fully built arguments, and
output column of the aggregate.

In order to avoid traversing the expression tree multiple times, the entire
semantic analysis and type checking phase is completed before
building any non-aggregates for these clauses.

This commit avoids allocating slices for analyzed expressions by
instead creating them as columns directly in the scope which will
use them. It later makes another pass through the columns to build them.

This and various other performance improvements make the fix for
correlated aggregates perform at parity with the master branch.

Release note: None
opt: test updates for building aggregates
This commit contains test updates from the previous commit.

Release note: None
opt: track column references and outer columns in optbuilder
This commit adds the option to track the set of column references in each
scalar expression built in the optbuilder. This new feature is used when
building aggregate arguments so that the scope for aggregates can be
determined without examining the memo structure. Additionally, this commit
adds logic to track the set of outer columns for each subquery so that
correlated subqueries can be checked for correctness. In order to
facilitate this, a colRefs attribute has been added to the aggregateInfo
struct, and an outerCols attribute has been added to the subquery struct.

Release note: None
craig[bot] and petermattis
Merge #29317
29317: release-2.1: adjust gossip timings to minimize gossip partitions r=tschottdorf a=petermattis

Backport:
  * 1/1 commits from "gossip: replace tickers with timers" (#29051)
  * 6/6 commits from "gossip: maintain a gossip connectivity map" (#28963)

Please see individual PRs for details.

/cc @cockroachdb/release


Co-authored-by: Peter Mattis <petermattis@gmail.com>
gossip: rework how infostore callbacks are run
Rather than starting a goroutine per call of `infostore.runCallbacks`,
now there is a single goroutine per infostore that runs the
callbacks. Callback invocation was already serialized by
`infostore.callbackMu`, so the per-runCallbacks goroutine was nothing
but a resource waste. Additionally, it was causing flakiness in
TestNetworkReachesEquilibrium when run under race because that test
would frequently hits the error "race: limit on 8192 simultaneously
alive goroutines is exceeded, dying". Instrumentation revealed that a
majority of the goroutines were from runCallbacks.

Fixes #29335

Release note: None
knz
base: fix TestValidateAddrs (cont.)
The fix in #29287 only fixed half of the test for macOS. This patch
fixes the other half.

Release note: None
craig[bot] and knz
Merge #29344
29344: release-2.1: base: fix TestValidateAddrs (cont.) r=knz a=knz

Backport 1/1 commits from #29342.

/cc @cockroachdb/release

---

The fix in #29287 only fixed half of the test for macOS. This patch
fixes the other half.

Release note: None


Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
sql: fix problems with reusing descriptor names
This change fixes two problems with descriptor name reuse
when a session ends after committing a schema change but before
executing it. The asynchronous execution of schema changes had
two bugs.
1. The table data GC delay was being incorrectly applied to
VIEW and SEQUENCE types.
2. A table that was dropped was put on the delayed deletion
queue without first checking if its name had been GC-ed.

fixes #28409
related to #27135

Release note (sql change): Fix problem with VIEW/TABLE name not
being recycled quickly after DROP VIEW/TABLE.
craig[bot] and petermattis
Merge #29343
29343: release-2.1: gossip: rework how infostore callbacks are run r=tschottdorf a=petermattis

Backport 1/1 commits from #29338.

/cc @cockroachdb/release

---

Rather than starting a goroutine per call of `infostore.runCallbacks`,
now there is a single goroutine per infostore that runs the
callbacks. Callback invocation was already serialized by
`infostore.callbackMu`, so the per-runCallbacks goroutine was nothing
but a resource waste. Additionally, it was causing flakiness in
TestNetworkReachesEquilibrium when run under race because that test
would frequently hits the error "race: limit on 8192 simultaneously
alive goroutines is exceeded, dying". Instrumentation revealed that a
majority of the goroutines were from runCallbacks.

Fixes #29335

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
sql: temporarily skip TestSplitAt
See #29169

Release note: None
craig[bot] and petermattis
Merge #29354
29354: release-2.1: sql: temporarily skip TestSplitAt r=arjunravinarayan a=petermattis

Backport 1/1 commits from #29347.

/cc @cockroachdb/release

---

See #29169

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
distsql: embed ProcessorBase in planNodeToRowSource
planNodeToRowSource had incorrect closing semantics. When closing itself
(in any case), it would call `ConsumerClosed` on `toDrain` and proceed to
drain it. The contract is that `Next` should never be called after
`ConsumerClosed`. The underlying issue here is that we are trying to
re-implement RowSource semantics. To solve this, and any other possible
issues, we embed ProcessorBase and reuse existing code.

Release note: None
distsql: add EXPLAIN ANALYZE tests
Release note: None
changefeedccl: stop checking that tracked spans haven't changed
This was apparently a bit of overeager defensiveness. There's no way for
this to happen currently (so it was impossible to test this error) and
the check is getting in the way of the enterprise distsql planning as
well as rangefeed-based changefeeds.

Release note: None
changefeedccl: implement enterprise planning
One ChangeAggregator processor is run on each node to watch the spans it
is a leaseholder for. These all feed into a single ChangeFrontier, which
assembles partial progress updates into changefeed-wide resoved
timestamps, which it then emits.

Planning for the sinkless version still places one ChangeAggregator (and
one ChangeFrontier) on the gateway node, since everything is funnelled
through a single pgwire connection anyway.

Closes #28843

Release note: None
craig[bot] and asubiotto
Merge #29363
29363: release-2.1: distsql: embed ProcessorBase in planNodeToRowSource r=asubiotto a=asubiotto

Backport 2/2 commits from #29267.

/cc @cockroachdb/release

---

    planNodeToRowSource had incorrect closing semantics. When closing itself
    (in any case), it would call `ConsumerClosed` on `toDrain` and proceed to
    drain it. The contract is that `Next` should never be called after
    `ConsumerClosed`. The underlying issue here is that we are trying to
    re-implement RowSource semantics. To solve this, and any other possible
    issues, we embed ProcessorBase and reuse existing code.

This change also adds an `EXPLAIN ANALYZE` test that was previously failing because of these closing semantics.


Co-authored-by: Alfonso Subiotto Marqués <alfonso@cockroachlabs.com>
3 authors
Merge #29314 #29330 #29368
29314: release-2.1: changefeedccl: add an experimental-sql sink r=mrtracy a=danhhz

Backport 2/2 commits from #29303.

/cc @cockroachdb/release

---

This is the [Table Sink] described by the RFC.

sqlSink mirrors the semantics offered by kafkaSink as closely as
possible, but writes to a SQL table (presumably in CockroachDB).
Currently only for testing.

Each emitted row or resolved timestamp is stored as a row in the table.
Each table gets 3 partitions. Similar to kafkaSink, the order between
two emits is only preserved if they are emitted to by the same node and
to the same partition.

Release note: None

[table sink]: https://github.com/cockroachdb/cockroach/blob/release-2.1/docs/RFCS/20180501_change_data_capture.md#sql-table-sink


29330: roachtest,kv: assorted backports r=petermattis a=tschottdorf

b270b3c (Tobias Schottdorf, 16 hours ago)
   roachtest: skip queue test

   We know that this is going to fail until we pick up #17229 again, which is
   not going to happen during the stability period.

   Release note: None

8396c34 (Tobias Schottdorf, 17 hours ago)
   roachtest: remove hack from version test

   It's now running against v2.0.5 which does not require the workaround.

   Release note: None

db5f5cf (Tobias Schottdorf, 17 hours ago)
   roachtest: further clean up mixedVersion test

   Remove spurious TODO, use a monitor instead of an errgroup, fix the TPCC
   invocation which previously was barely doing anything.

cbad2da (Tobias Schottdorf, 18 hours ago)
   roachtest: run mixed version tests against v2.0.5

   v2.0.0 contains many bugs that we don't need to revisit.

   Release note: None

eab9ba8 (Tobias Schottdorf, 18 hours ago)
   roachtest: disable merge queue via query that works on v2.0

   We renamed the first column in 2.1.

   Release note: None

ddeea8a (Tobias Schottdorf, 18 hours ago)
   roachtest: don't use flags v2.0 doesn't know

   Reverts a tiny part of #27800.

   Fixes #29260. Fixes #29261.

   Release note: None

8e1108e (Tobias Schottdorf, 23 hours ago)
   roachtest: observe ctx cancellation in Run

   Before this patch, roachprod invocations would not observe ctx
   cancellation. Or rather they would, but due to the usual obscure passing of
   Stdout into the child process of roachprod the Run call would not return
   until the child had finished. As a result, the test would continue running,
   which is annoying and also costs money.

   Also fixes up the handling of calling `c.t.Fatal` on a monitor goroutine
   (using what is perhaps unspecified behavior of the Go runtime). Anyway, the
   result is that you can do basically whatever inside of a monitor and get
   away with it:

   ```go m.Go(func(ctx context.Context) error {
   // Make sure the context cancellation works (not true prior to the PR
   // adding this test).
   return c.RunE(ctx, c.Node(1), "sleep", "2000")
   }) m.Go(func(ctx context.Context) error {
   // This will call c.t.Fatal which also used to wreak havoc on the test
   // harness. Now it exits just fine (and all it took were some mean hacks).
   // Note how it will exit with stderr and stdout in the failure message,
   // which is extremely helpful.
   c.Run(ctx, c.Node(1), "echo foo && echo bar && notfound")
   return errors.New("impossible")
   }) m.Wait()
   ```

   now returns

   ```
   --- FAIL: tpmc/w=1/nodes=3 (0.24s)
   ...,errgroup.go:58: /Users/tschottdorf/go/bin/roachprod run local:1 -- echo
   foo && echo bar && notfound returned:
   stderr:
   bash: notfound: command not found
   Error:  exit status 127

   	stdout:
   foo
   bar
   : exit status 1
   ...,tpcc.go:661: Goexit() was called FAIL
   ```

   Release note: None

e66d6ff (Tobias Schottdorf, 2 days ago)
   kv: silence spammy log

   This can fill pages of logs when a cluster starts, especially when it never
   manages to connect go Gossip.

   Release note: None

2ad9657 (Tobias Schottdorf, 3 days ago)
   workload: only change merge settings if they exist

   Fixes #29112. Fixes #29111.

   Release note: None

29368: release-2.1: changefeedccl: implement enterprise planning r=mrtracy a=danhhz

Backport 2/2 commits from #28984.

/cc @cockroachdb/release

---

One ChangeAggregator processor is run on each node to watch the spans it
is a leaseholder for. These all feed into a single ChangeFrontier, which
assembles partial progress updates into changefeed-wide resoved
timestamps, which it then emits.

Planning for the sinkless version still places one ChangeAggregator (and
one ChangeFrontier) on the gateway node, since everything is funnelled
through a single pgwire connection anyway.

Closes #28843

Release note: None

This is under-tested, but fixing that requires paying down some tech debt that I'd much prefer to do as a followup.


Co-authored-by: Daniel Harrison <daniel.harrison@gmail.com>
Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
ui: fix command queue page always crashing on load
Due to a subtle mistake in a refactoring of the Loading component, the
function passed in to the loading component on this page was not bound
properly to the component instance.

Fixes #28966

Release note: None
ui: don't show tick at 125% for charts in default range 0-100%
Percentage charts should show the range 0-100% on their y axis by
default, but they were showing 0-125%. This change ensures that an
additional axis tick is not added in this case.

Release note: None
ui: lint: fix small whitespace inconsistency
Unfortunately our linter doesn't yet catch these.

Release note: None
dt
telemetry: quantize feature usage counts too
These counts are similar to sql query run counts, so should be treated the same way.

Release note: none.
dt
sql, importccl: allow and use AS OF SYSTEM TIME in EXPORT's query
Ideally EXPORT is just like a normal SELECT, including any AS OF SYSTEM TIME, just with the output redirected to cloud storage.

Release note: none.
dt
importccl: allow mysql AUTO_INCREMENT without start val
for empty tables, mysqldump does not emit the AUTO_INCREMENT=X comment on the CREATE TABLE.
Previously we used this comment to determine if we'd make a sequence to allow auto_increment
default expressions on individual columns in the table definition.
However for empty tables, this meant those columns would then error since we thought we were
not going to support the sequence.

Instead, look for any auto_increments first to determine if we need the sequence, then handle
the individual column defs.

Release note: none.

Fixes #29300.
sql: correctly handle NULL arrays in generator funcs
Change both unnest and _pg_expandarray to return UnknownReturnType if
passed NULL as their first argument during type checking. Teach the type
checker that if a func returns UnknownReturnType after type checking,
then it is because we didn't have enough information to generate a
correct type, and so should produce an error immediately. If these
funcs are passed a NULL array during execution (after type checking),
they should still succeed.

Also fix evalAndReplaceSubqueryVisitor to correctly type expressions
during subquery replacement which allows the previous change to work
both in local and distsql execution.

Fixes #29239 Fixes #28561

Release note (bug fix): The functions `unnest` and `_pg_expandarray`
now return an error when called with NULL as their first argument.
storage: skip TestClosedTimestampCanServe
It won't be the first one I'm looking into.

Release note: None
cli: add hex option to debug keys
This was used in #29252 and I imagine I'll want to use it again whenever
we see the consistency checker fail in the future.

Release note: None
roachtest: fix flake in TestMonitor
Accidentally changed this in #29178.

Release note: None
roachtest: improve TestMonitor
Add two more test cases about the exit status of the `monitor`
invocation itself.

Release note: None
storage: remove test-only data race
The test was stopping the engine before the stopper, so the compactor
was sometimes able to use the engine while it was being closed.

Fixes #29302.

Release note: None
dt
sql: don't startExec planHook subplans
Sub-plans of hooked plans are never run locally, so they do not want to be started.
Sub-plans are only ever used as the basis for distsql planning which is run my the
hook functions directly.

Release note: none.
dt
Merge pull request #29381 from dt/backport2.1-29118
release-2.1: telemetry: quantize feature usage counts too
dt
Merge pull request #29382 from dt/backport2.1-29208
release-2.1: sql, importccl: allow and use AS OF SYSTEM TIME in EXPORT's query
dt
Merge pull request #29383 from dt/backport2.1-29306
release-2.1: importccl: allow mysql AUTO_INCREMENT without start val
dt
importccl: allow literal values in mysqldump DEFAULTs
Previously we were failing to type literal strings as string expressions, instead thinking they were func or vars.
Wrap mysql's StrVals in quotes before handing them to the parser to have them correctly handled as literals.

Release note: none.

Fixes #29301.
Fixes #29309.
storage: Update range/lease counts in store pool upon transfers
We were already doing this for other stats such as qps, but had left out
the range and lease counts themselves. It's worth keeping those up to
date as well.

Release note: None
storage: Remove unnecessary log.Infos from AdminTransferLease
It looks like I left these in accidentally in
18d3bfe. They're not really spammy
since AdminTransferLease isn't frequently used, but they're also not
needed.

Release note: None
Merge pull request #29385 from mjibson/backport2.1-29244
release-2.1: sql: correctly handle NULL arrays in generator funcs
Merge pull request #29136 from a-robinson/backport2.1-28975
backport-2.1: storage: Track renewable expiration-based leases in map instead of chan
dt
Merge pull request #29392 from dt/backport2.1-29218
release-2.1: sql: don't startExec planHook subplans
dt
Merge pull request #29394 from dt/backport2.1-29315
release-2.1: importccl: allow literal values in mysqldump DEFAULTs
Merge pull request #29395 from a-robinson/backport2.1-29387
backport-2.1: storage: Remove unnecessary log.Infos from AdminTransferLease
Merge pull request #29393 from a-robinson/backport2.1-29370
backport-2.1: storage: Update range/lease counts in store pool upon transfers
Merge pull request #29376 from vilterp/backport2.1-29316
release-2.1: ui: fix command queue page always crashing on load
Merge pull request #29377 from vilterp/backport2.1-29360
release-2.1: ui: don't show 125% axis label on percentage chart
Commits on Aug 31, 2018
sql: fix flaky TestLeaseRenewedPeriodically
I changed GossipUpdateEvent as well which is strictly not needed
but there is a chance without it there will continue to be
some flakiness.

This fix essentially moves the check for release counts
further up before lease acquisition. This fixes a race
where after lease acquisition the periodic timer would
fire, acquire/release some leases.

fixes #28771

Release note: None
Merge pull request #29352 from vivekmenezes/backport2.1-29116
backport-2.1: sql: fix problems with reusing descriptor names
sql: quickly determine constants during normalization
Prior to this commit, the SQL normalization code had worst case
O(n^2) performance in the depth of the scalar expression. In rare
cases, the normalization code would traverse the entire expression
tree multiple times to determine if an expression was constant.

This is overkill, since normalization happens bottom-up, and
therefore constants are evaluated bottom-up. As a result, it's only
necessary to check if an expression's children are const Datums to
determine whether the expression itself is constant (in addition to
the existing checks that the expression itself is a not a variable
or impure function).

This commit creates a new Visitor called `fastIsConstVisitor`, which
determines whether an expression is constant by traversing at most
two levels of the tree.

Release note: None
gossip: don't allow loopback infos
When adding infos from a remote node, skip any info which originated on
the local node.

Release note: None
sql: fix panic when normalizing ->(NULL::STRING)
Prior to this commit, when (NULL::STRING) was on the right-hand side
of a -> operator, the normalization code caused a panic:

interface conversion: tree.Datum is tree.dNull, not *tree.DString

This was due to the fact that Eval returns dNull instead of a *DString
when evaluating NULL::STRING, but the code in ComparisonExpr.normalize was
expecting a *DString.

This commit adds a check to make sure that we still have a string after
Eval is called.

This bug was uncovered because opt/exec/execbuilder had some tests in
the inverted_index test file that were not present in
logictest/testdata/planner_test/inverted_index, which contained an expression
with ->(NULL::STRING). These tests have been added as regression tests
in planner_test.

Fixes #29399

Release note (bug fix): Fixed a panic with SQL statements containing
"->(NULL::STRING)".
craig[bot] and tschottdorf
Merge #29390
29390: backport-2.1: storage, roachtest, cli: assorted backports r=petermattis a=tschottdorf

storage: remove test-only data race

   The test was stopping the engine before the stopper, so the compactor was
   sometimes able to use the engine while it was being closed.

   Fixes #29302.

   roachtest: improve TestMonitor

   Add two more test cases about the exit status of the `monitor` invocation
   itself.

   roachtest: fix flake in TestMonitor

   Accidentally changed this in #29178.

   cli: add hex option to debug keys

   This was used in #29252 and I imagine I'll want to use it again whenever we
   see the consistency checker fail in the future.

   storage: skip TestClosedTimestampCanServe

   It won't be the first one I'm looking into.


Release note: None

Co-authored-by: Tobias Schottdorf <tobias.schottdorf@gmail.com>
craig[bot] and rytaft
Merge #29337
29337: release-2.1: opt: fix building of aggregates with outer columns r=rytaft a=rytaft

Backport 3/3 commits from #28774.

/cc @cockroachdb/release

---

This commit fixes a bug in which aggregates containing
outer columns were associated with the wrong scope.

For example, consider this query:
```
  SELECT (SELECT max(a.x) FROM b) FROM a;
```
Previously, the inner (subquery) scope was selected as the grouping scope
for `max(a.x)`. However, this was incorrect. The outer scope should
have been selected instead.

In order to fix this issue, this commit makes significant changes
to the way queries are built by the `optbuilder`. It is difficult
to determine whether a particular select clause needs aggregation without
first building the arguments to the aggregates to see whether there are outer
columns. Therefore, this commit makes the following change: before any
expressions in the `SELECT` list are built, all aggregates in the `SELECT`,
`DISTINCT ON`, `ORDER BY`, and `HAVING` clauses are replaced with an `aggregateInfo`.
The `aggregateInfo` contains the function definition, fully built arguments, and
output column of the aggregate.

In order to avoid traversing the expression tree multiple times, the entire
semantic analysis and type checking phase is completed before
building any non-aggregates for these clauses.

This commit avoids allocating slices for analyzed expressions by
instead creating them as columns directly in the scope which will
use them. It later makes another pass through the columns to build them.
    
This and various other performance improvements make the fix for
correlated aggregates perform at parity with the master branch.

Master branch:
```
name                       time/op
Phases/kv-read/OptBuild    18.8µs ± 4%
Phases/planning1/OptBuild  8.06µs ± 3%
Phases/planning2/OptBuild  20.3µs ± 5%
Phases/planning3/OptBuild  22.0µs ± 3%
Phases/planning4/OptBuild  24.1µs ± 4%
Phases/planning5/OptBuild  22.2µs ± 8%
Phases/planning6/OptBuild  36.8µs ± 2%
```
Correlated aggs:
```
name                       time/op
Phases/kv-read/OptBuild    18.4µs ± 3%
Phases/planning1/OptBuild  7.92µs ± 1%
Phases/planning2/OptBuild  20.6µs ± 4%
Phases/planning3/OptBuild  22.2µs ± 4%
Phases/planning4/OptBuild  24.1µs ± 3%
Phases/planning5/OptBuild  21.4µs ± 1%
Phases/planning6/OptBuild  36.3µs ± 2%
```

Fixes #12703

Release note: None


Co-authored-by: Rebecca Taft <becca@cockroachlabs.com>
sql: don't remove batch limit in local lookup join
Previously, the local lookup join node was created with an underlying
scan with an explicitly disabled batch limit. This is the wrong thing to
do, since that scan will be used as a full table scan - it needs a batch
limit to prevent OOM errors.

Release note: None
dt
importccl: add decompression during schema reading too
We added support for compressed IMPORT input but only during the row-reading.
With the addition of the schema-reading pass, we need to use the same decompression there too.

This also checks in bzip and gzip testdata examples for MySQL so that we can test these in unit tests more easily.

Release note: none.
craig[bot] and rytaft
Merge #29411
29411: release-2.1: sql: quickly determine constants during normalization r=rytaft a=rytaft

Backport 1/1 commits from #29289.

/cc @cockroachdb/release

---

Prior to this commit, the SQL normalization code had worst case
O(n^2) performance in the depth of the scalar expression. In rare
cases, the normalization code would traverse the entire expression
tree multiple times to determine if an expression was constant.

This is overkill, since normalization happens bottom-up, and
therefore constants are evaluated bottom-up. As a result, it's only
necessary to check if an expression's children are const Datums to
determine whether the expression itself is constant (in addition to
the existing checks that the expression itself is a not a variable
or impure function).

This commit creates a new Visitor called `fastIsConstVisitor`, which
determines whether an expression is constant by traversing at most
two levels of the tree.

Fixes #26417 

Release note: None


Co-authored-by: Rebecca Taft <becca@cockroachlabs.com>
distsql: don't modify distsql planner's metadataTestTolerance
This was done unnecessarily and resulted in races since the distsql
planner can be used concurrently.

Fixes #28999

Release note: None
Merge pull request #29422 from jordanlewis/backport2.1-29397
backport-2.1: sql: don't remove batch limit in local lookup join
dt
vendor: bump vitess
This picks up fixes for mysqldump output with indexes with no name or parens after current_timestamp.

These do not appear in the default output of mysqldump  Ver 10.13 Distrib 5.7.22, so our current testdata does not contain them.

Release note: none.
sql: fix panic with JSON array ops and nulls
Fixes #28811.

Release note (bug fix): Fix a panic with JSON values and operations that
use arrays.
craig[bot] and petermattis
Merge #29413
29413: release-2.1: gossip: infoStore.mostDistant should never return the local node-id r=a-robinson a=petermattis

Backport 2/2 commits from #29398.

/cc @cockroachdb/release

---

Fixes #28517

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
server: fix a confusing start message
Include both bootstrapped (existing) and empty (new) engines in the
start message. Previously the message was only include bootstrapped
engines which was confusing when a node first joined a cluster.

See #27599

Release note: None
Merge pull request #29432 from justinj/backport2.1-28830
backport-2.1: sql: fix panic with JSON array ops and nulls
craig[bot] and petermattis
Merge #29434
29434: release-2.1: server: fix a confusing start message r=a-robinson a=petermattis

Backport 1/1 commits from #29412.

/cc @cockroachdb/release

---

Include both bootstrapped (existing) and empty (new) engines in the
start message. Previously the message was only include bootstrapped
engines which was confusing when a node first joined a cluster.

See #27599

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
craig[bot] and vivekmenezes
Merge #28965
28965: backport-2.1: sql: fix flaky TestLeaseRenewedPeriodically r=vivekmenezes a=vivekmenezes

Backport 1/1 commits from #28954.

/cc @cockroachdb/release

---

I changed GossipUpdateEvent as well which is strictly not needed
but there is a chance without it there will continue to be
some flakiness.

This fix essentially moves the check for release counts
further up before lease acquisition. This fixes a race
where after lease acquisition the periodic timer would
fire, acquire/release some leases.

fixes #28771

Release note: None


Co-authored-by: Vivek Menezes <vivek@cockroachlabs.com>
knz
cli: periodically flush csv/tsv output
The "sinkless" version of changefeeds continuously streams back
results to the user over pgwire. Prior to this patch, this data could
not be consumed effectively with `cockroach sql` using the tsv/csv
output, because the tsv/csv formatter buffers rows internally.

This patch makes tsv/csv output in `cockroach sql` an effective way to
consume changefeeds by ensuring an upper bound on the time rows stays
buffered inside the formatter. The flush period is fixed to 5 seconds.

For context, all the other formatters except for `table` are
line-buffered and thus flush on every row. `table` is a world of its
own which buffers *all* the rows until the query is complete, and that
is unlikely to change any time soon, so this patch doesn't touch that
either.

Release note (cli change): The `csv` and `tsv` formats for `cockroach`
commands that output result rows now buffer data for a maximum of 5
seconds. This makes it possible to e.g. view SQL changefeeds
interactively with `cockroach sql` and `cockroach demo`.
knz
cli: avoid deprecation warnings for deprecated flags
Suggested/recommended by @a-robinson.

The flags `--host`, `--advertise-host`, etc are now deprecated, but
there is no cost in continuing to support them. Also users migrating
from previous versions are not losing anything (or missing out) by
continuing to use them. Forcing a warning to appear when they are used
does not bring any tangible benefit and risks creating operator
fatigue.

So this patch removes the warning (but keeps the deprecated flags
hidden, so that new users are guided to the new flags).

Release note: None
dt
Merge pull request #29430 from dt/backport2.1-29351
release-2.1: vendor: bump vitess
knz
deps: fork go-yaml to get Nikhil's patch
We want to use go-yaml/yaml#381
which is not yet merged.

Release note: None
knz
deps: unbreak the windows build
... by reimporting github.com/shirou/w32.

Release note: None
client: remove a bad assertion
We were asserting that a txn closure doesn't declare success if the txn
is marked as ABORTED. Some sort of error is generally expected in that
case.
Unfortunately, the assertion is naive. The proto can be marked as
ABORTED async by the heartbeat loop, in which case the client only finds
out on the imminent commit attempt.

Fix #28743

Release note: bug fix: fix a rare crash with the message "no err but
aborted txn proto"
Commits on Sep 01, 2018
opt: process FKs in test catalog
The test catalog currently ignores foreign keys. This is a problem
because FKs sometimes result in automatic indexes being created, so
our tests can be missing indexes that would exist in a real setting
(e.g. see TPC-H).

This change adds code to resolve FKs and add indexes as necessary.

Release note: None
craig[bot] and knz
Merge #29445
29445: release-2.1: cli: periodically flush csv/tsv output r=knz a=knz

Backport 1/1 commits from #28688.

/cc @cockroachdb/release

---

Fixes #28654.

The "sinkless" version of changefeeds continuously streams back
results to the user over pgwire. Prior to this patch, this data could
not be consumed effectively with `cockroach sql` using the tsv/csv
output, because the tsv/csv formatter buffers rows internally.

This patch makes tsv/csv output in `cockroach sql` an effective way to
consume changefeeds by ensuring an upper bound on the time rows stays
buffered inside the formatter. The flush period is fixed to 5 seconds.

For context, all the other formatters except for `table` are
line-buffered and thus flush on every row. `table` is a world of its
own which buffers *all* the rows until the query is complete, and that
is unlikely to change any time soon, so this patch doesn't touch that
either.

Release note (cli change): The `csv` and `tsv` formats for `cockroach`
commands that output result rows now buffer data for a maximum of 5
seconds. This makes it possible to e.g. view SQL changefeeds
interactively with `cockroach sql` and `cockroach demo`.


Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
craig[bot] and knz
Merge #29448
29448: release-2.1: cli: avoid deprecation warnings for deprecated flags r=knz a=knz

Backport 1/1 commits from #29446.

/cc @cockroachdb/release

---

Suggested/recommended by @a-robinson.

The flags `--host`, `--advertise-host`, etc are now deprecated, but
there is no cost in continuing to support them. Also users migrating
from previous versions are not losing anything (or missing out) by
continuing to use them. Forcing a warning to appear when they are used
does not bring any tangible benefit and risks creating operator
fatigue.

So this patch removes the warning (but keeps the deprecated flags
hidden, so that new users are guided to the new flags).

Release note: None


Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
Commits on Sep 02, 2018
craig[bot] and vivekmenezes
Merge #29294
29294: backport-2.1: sql: fix problem with TRUNCATE messing up SEQUENCE dependency r=vivekmenezes a=vivekmenezes

Backport 1/1 commits from #29148.

/cc @cockroachdb/release

---

While truncating a table, the column dependency to a sequence
was being retained correctly, but the reference from the sequence
back to the table was not getting updated, resulting in the
sequence retaining a reference to the old TRUNCATED table.

fix #29010

Release note (sql change): Fixed problem with messing up schema
on running TRUNCATE on a table with a reference to a sequence.


Co-authored-by: Vivek Menezes <vivek@cockroachlabs.com>
craig[bot] and andreimatei
Merge #29456
29456: release-2.1: client: remove a bad assertion r=andreimatei a=andreimatei

Backport 1/1 commits from #29442.

/cc @cockroachdb/release

---

We were asserting that a txn closure doesn't declare success if the txn
is marked as ABORTED. Some sort of error is generally expected in that
case.
Unfortunately, the assertion is naive. The proto can be marked as
ABORTED async by the heartbeat loop, in which case the client only finds
out on the imminent commit attempt.

Fix #28743

Release note: bug fix: fix a rare crash with the message "no err but
aborted txn proto"


Co-authored-by: Andrei Matei <andrei@cockroachlabs.com>
Commits on Sep 03, 2018
craig[bot] and asubiotto
Merge #29426
29426: release-2.1: distsql: don't modify distsql planner's metadataTestTolerance r=asubiotto a=asubiotto

Backport 1/1 commits from #29375.

/cc @cockroachdb/release

---

This was done unnecessarily and resulted in races since the distsql
planner can be used concurrently.

Fixes #28999

Release note: None


Co-authored-by: Alfonso Subiotto Marqués <alfonso@cockroachlabs.com>
Commits on Sep 04, 2018
kv: rip out Transport.GetPending and batchClient.pending
These aren't needed anymore.

Release note: None
kv: rip out clientPendingMu from grpcTransport
This no longer needs to be thread-safe.

Release note: None
kv: rip out nodeID from batchClient
This state is already in ReplicaDescriptor.

Release note: None
kv: rip out Transport.Close
This was not needed anymore. This should provide a small
but real perf win because we can avoid the context.WithCancel
call for every RPC.

Release note: None
engine: use Txn.Timestamp instead of OrigTimestamp for LogLogicalOp
The `OrigTimestamp` was being used to give a timestamp to the
txn in the logical op log because that is the timestamp the
intent is written at. However, the intent can't actually
commit until its `Timestamp`. This was causing issues with
closed timestamps, which forwards the `Txn.Timestamp` but don't
affect the `OrigTimestamp`. Without this change we were hitting
an assertion in the rangefeed package where a new intent looked
like it was below the resolved timestamp.

Release note: None
rangefeed: add release valve to logical op consumption
This change adds a new `EventChanTimeout` configuration to
`rangefeed.Processor`. The config specifies the maximum duration
that `ConsumeLogicalOps` and `ForwardClosedTS` will block on the
Processor's input channel. Once the duration is exceeded, these
methods will stops the processor with an error and tear it down.
This prevents slow consumers from making callers of the two methods
block.

The first method is used downstream of Raft, where blocking due to
slow rangefeed consumers for an unbounded amount of time is unacceptable.
It may require that we resize the processor's input buffer.

A TODO is left about reducing the impact of a single slow consumer
on an entire rangefeed.Processor. This can be left as a future
improvement.

Release note: None
rangefeed: remove unsafe RangeFeedValue contract
The previous contract for `rangefeed.Stream.Send` was that its
events were unsafe for use after it returned, mandating a clone
if they were stored somewhere. This added extra complication and
will force local RangeFeed rpcs to perform clones even when they're
not necessary. Remove this "optimization" and strengthen the contract.

Release note: None
kv: teach DistSender about RangeFeeds
This commit adds a RangeFeed method to DistSender and teaches it how
to split rangefeeds across ranges and how to manage their individual
lifecycle events.

Release note: None
changefeedccl: use RangeFeeds when cluster setting enabled
This commit adjusts the changefeed poller to use RangeFeed
requests when the corresponding cluster setting is enabled.

Release note: None
rangefeed: small perf-related changes
This PR includes two small perf tweak that came up
which running cdc/w=100/nodes=3/init=true.

Release note: None
changefeedccl: use ConnResultsBufferBytes in other tests
These seemed to get stuck without this.

Release note: None
storage: only allow rangefeeds with epoch-based leases
Ranges with expiration-based leases will not function properly
with closed timestamps, which means that we can't establish a
resolved timestamp on ranges that use them. Because of this,
we disallow rangefeeds on these ranges entirely.

This restriction may be lifted in the future.

Release note: None
storage: hook closed timestamps into rangefeed
This change hooks up closed timestamps into rangefeed. It does
so by creating a new closed timestamp subscription and notifying
all ranges with active rangefeed when new closed timestamp entries
arrive. This triggers each rangefeed to check for the maximum closed
timestamp on its range and use this to attempt to move its resolved
timestamp forward.

Eventually this will need some testing tied in with the closedts
package. That will have to wait until the closedts package is set
up to be more easily used in tests from the storage package (right
now `NoopContainer`s are used in tests everywhere). For now, we can
wait for integration testing with cdc.

Release note: None
craig[bot] and nvanbenschoten
Merge #29500
29500: release-2.1: backport 7 rangefeed PRs r=nvanbenschoten a=nvanbenschoten

Backport:
  * 5/5 commits from "kv: give Transport a haircut" (#28855)
  * 1/1 commits from "engine: use Txn.Timestamp instead of OrigTimestamp for LogLogicalOp" (#28970)
  * 1/1 commits from "rangefeed: add release valve to logical op consumption" (#29076)
  * 3/3 commits from "kv: teach DistSender about RangeFeeds, use for changefeeds" (#28912)
  * 1/1 commits from "rangefeed: small perf-related changes" (#29134)
  * 2/2 commits from "kv: truncate RangeFeed span to range descriptor " (#29219)
  * 2/2 commits from "storage: hook closed timestamps into rangefeed" (#28974)

Please see individual PRs for details.

All cherry-picks were clean.

/cc @cockroachdb/release


Co-authored-by: Nathan VanBenschoten <nvanbenschoten@gmail.com>
dt
Merge pull request #29423 from dt/backport2.1-29364
release-2.1: importccl: add decompression during schema reading too
Merge pull request #29457 from RaduBerinde/backport2.1-29415
release-2.1: opt: process FKs in test catalog
rpc: allow dialing connections to be cancelled
Use DialContext instead of Dial so the dialing connection will respond
to cancellation and clean up in a prompt manner when the stopper is
quiesced. Fixes leaked goroutine flakes in the gossip package when run
with `make stress`.

Release note: None
roachtest: fix Status call
`test.Status()` does not take a format string. It is equivalent to
`fmt.Sprint()`.

Release note: None
knz
cli/interactive_tests: deflake test_exec_log.tcl
The tests were not properly waiting for the SQL statements to complete
executing before going on to observe the execution log. This patch
ensures they wait enough.

Release note: None
knz
cli/interactive_tests: deflake test_missing_log_output.tcl
`cockroach quit` has been reporting warnings during normal operation
since the dawn of time, for example when it switches from a soft to a
hard shutdown. A test was incorrectly asserting that the command
wouldn't log anything during normal operation, or merely some warning
in the grpc package. This patch removes the incorrect assertions.

Release note: None
knz
cli: deflake TestNodeStatus
The live byte expected maxima are growing over time. This syncs the
test with current common observed values and gives us some head space.

Release note: None
3 authors
Merge #29414 #29443
29414: release-2.1: sql: fix panic when normalizing ->(NULL::STRING) r=rytaft a=rytaft

Backport 1/1 commits from #29404.

/cc @cockroachdb/release

---

Prior to this commit, when `(NULL::STRING)` was on the right-hand side
of a `->` operator, the normalization code caused a panic:
```
interface conversion: tree.Datum is tree.dNull, not *tree.DString
```
This was due to the fact that `Eval` returns `dNull` instead of a `*DString`
when evaluating `NULL::STRING`, but the code in `ComparisonExpr.normalize` was
expecting a `*DString`.

This commit adds a check to make sure that we still have a string after
`Eval` is called.

This bug was uncovered because `opt/exec/execbuilder` had some tests in
the `inverted_index` test file that were not present in
`logictest/testdata/planner_test/inverted_index`, which contained an expression
with `->(NULL::STRING)`. These tests have been added as regression tests
in `planner_test`.

Fixes #29399

Release note (bug fix): Fixed a panic with SQL statements containing
"->(NULL::STRING)".


29443: release-2.1: sync vendor with master r=knz a=knz

Backport:
  * 1/1 commits from "deps: fork go-yaml to get Nikhil's patch" (#29349)
  * 1/1 commits from "deps: unbreak the windows build" (#29428)

Please see individual PRs for details.

/cc @cockroachdb/release


Co-authored-by: Rebecca Taft <becca@cockroachlabs.com>
Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
opt,sql: run eval tests through optimizer
We have all these scalar tests, might as well run them through the
optimizer to make sure it's representing them faithfully.

Release note: None
4 authors
Merge #29512 #29513 #29516 #29521
29512: release-2.1: rpc: allow dialing connections to be cancelled r=tschottdorf a=petermattis

Backport 1/1 commits from #29034.

/cc @cockroachdb/release

---

Use DialContext instead of Dial so the dialing connection will respond
to cancellation and clean up in a prompt manner when the stopper is
quiesced. Fixes leaked goroutine flakes in the gossip package when run
with `make stress`.

Release note: None


29513: release-2.1: *: minor fixes for go1.11 warnings r=tschottdorf a=petermattis

Backport 2/2 commits from #29062.

/cc @cockroachdb/release

---

Not really necessary, but I'm going to be backporting more roachtest changes and I don't want the roachtest commit in this PR to cause a conflict.

29516: release-2.1: assorted deflakes r=knz a=knz

Backport:
  * 1/1 commits from "cli/interactive_tests: deflake test_exec_log.tcl" (#29452)
  * 1/1 commits from "cli/interactive_tests: deflake test_missing_log_output.tcl" (#29453)
  * 1/1 commits from "cli: deflake TestNodeStatus" (#29454)

Please see individual PRs for details.

/cc @cockroachdb/release


29521: backport-2.1: opt,sql: run eval tests through optimizer r=justinj a=justinj

Backport 1/1 commits from #29066.

/cc @cockroachdb/release

---

We have all these scalar tests, might as well run them through the
optimizer to make sure it's representing them faithfully.

Release note: None


Co-authored-by: Peter Mattis <petermattis@gmail.com>
Co-authored-by: Raphael 'kena' Poss <knz@cockroachlabs.com>
Co-authored-by: Justin Jaffray <justin@cockroachlabs.com>
Merge pull request #29502 from benesch/backport2.1-29284
release-2.1: kv: disable merge queue for TestTxnCoordSenderRetries
Commits on Sep 09, 2018
c-deps: bump rocksdb to pick up segfault fix
Bump RocksDB to pick up cockroachdb/rocksdb#12, which fixes a memory
error that was causing an adriatic node to segfault during startup. It
is likely that this bug could also cause corruption, though no such
corruption has been observed.

Release note: None
Showing 659 changed files with 24,242 additions and 13,872 deletions.
View
@@ -1 +1,2 @@
*.pb.* -diff
*.golden -diff
View

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
View
@@ -4,7 +4,6 @@ required = [
"github.com/cockroachdb/stress",
"github.com/golang/dep/cmd/dep",
"github.com/golang/lint/golint",
"github.com/google/pprof",
"github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway",
"github.com/jteeuwen/go-bindata/go-bindata",
"github.com/kisielk/errcheck",
@@ -68,7 +67,7 @@ ignored = [
[[constraint]]
name = "vitess.io/vitess"
source = "https://github.com/cockroachdb/vitess"
branch = "no-flag-on-delete"
branch = "no-flag-names-parens"
# The master version of go.uuid has an incompatible interface and (as
# of 2018-06-06) a serious bug. Don't upgrade without making sure
@@ -84,6 +83,12 @@ ignored = [
name = "github.com/Azure/azure-storage-blob-go"
branch = "master"
# We want https://github.com/go-yaml/yaml/pull/381
[[constraint]]
name = "gopkg.in/yaml.v2"
source = "https://github.com/cockroachdb/yaml"
branch = "v2-encoding-style"
# github.com/docker/docker depends on a few functions not included in the
# latest release: reference.{FamiliarName,ParseNormalizedNamed,TagNameOnly}.
#
View
@@ -288,7 +288,6 @@ bin/.bootstrap: $(GITHOOKS) Gopkg.lock | bin/.submodules-initialized
./vendor/github.com/cockroachdb/crlfmt \
./vendor/github.com/cockroachdb/stress \
./vendor/github.com/golang/lint/golint \
./vendor/github.com/google/pprof \
./vendor/github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway \
./vendor/github.com/jteeuwen/go-bindata/go-bindata \
./vendor/github.com/kisielk/errcheck \
@@ -301,6 +300,7 @@ bin/.bootstrap: $(GITHOOKS) Gopkg.lock | bin/.submodules-initialized
./vendor/golang.org/x/tools/cmd/stringer
touch $@
.SECONDARY: bin/.submodules-initialized
bin/.submodules-initialized:
ifneq ($(GIT_DIR),)
git submodule update --init --recursive
@@ -714,14 +714,10 @@ all: build
.PHONY: c-deps
c-deps: $(C_LIBS_CCL)
build-mode = build -o $(build-output)
build-mode = build -o $@
go-install: build-mode = install
$(COCKROACH): build-output = $(COCKROACH)
$(COCKROACHOSS): build-output = $(COCKROACHOSS)
$(COCKROACHSHORT): build-output = $(COCKROACHSHORT)
$(COCKROACH) go-install generate: pkg/ui/distccl/bindata.go
$(COCKROACHOSS): BUILDTARGET = ./pkg/cmd/cockroach-oss
@@ -779,7 +775,8 @@ buildshort: ## Build the CockroachDB binary without the admin UI.
build: $(COCKROACH)
buildoss: $(COCKROACHOSS)
buildshort: $(COCKROACHSHORT)
build buildoss buildshort: $(DOCGEN_TARGETS) $(if $(is-cross-compile),,$(SETTINGS_DOC_PAGE))
build buildoss buildshort: $(DOCGEN_TARGETS)
build buildshort: $(if $(is-cross-compile),,$(SETTINGS_DOC_PAGE))
# For historical reasons, symlink cockroach to cockroachshort.
# TODO(benesch): see if it would break anyone's workflow to remove this.
@@ -913,7 +910,7 @@ lint: bin/returncheck
@# packages. In Go 1.10, only 'go vet' recompiles on demand. For details:
@# https://groups.google.com/forum/#!msg/golang-dev/qfa3mHN4ZPA/X2UzjNV1BAAJ.
$(xgo) build -i -v $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' $(PKG)
$(xgo) test ./pkg/testutils/lint -v $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' -run 'TestLint/$(TESTS)'
$(xgo) test ./pkg/testutils/lint -v $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' -run 'Lint/$(TESTS)'
.PHONY: lintshort
lintshort: override TAGS += lint
@@ -1079,7 +1076,7 @@ bin/.cpp_ccl_protobuf_sources: $(PROTOC) $(CPP_PROTOS_CCL)
.SECONDARY: $(UI_JS_OSS) $(UI_JS_CCL)
$(UI_JS_CCL): $(JS_PROTOS_CCL)
$(UI_JS_OSS) $(UI_JS_CCL): $(GW_PROTOS) pkg/ui/yarn.installed
$(UI_JS_OSS) $(UI_JS_CCL): $(GW_PROTOS) pkg/ui/yarn.installed | bin/.submodules-initialized
# Add comment recognized by reviewable.
echo '// GENERATED FILE DO NOT EDIT' > $@
$(PBJS) -t static-module -w es6 --strict-long --keep-case --path pkg --path ./vendor/github.com --path $(GOGO_PROTOBUF_PATH) --path $(COREOS_PATH) --path $(GRPC_GATEWAY_GOOGLEAPIS_PATH) $(filter %.proto,$^) >> $@
@@ -1285,9 +1282,10 @@ bin/.docgen_functions: bin/docgen
docgen functions docs/generated/sql --quiet
touch $@
$(SETTINGS_DOC_PAGE): $(build-output)
@$(build-output) gen settings-list --format=html > $@.tmp
@mv -f $@.tmp $@
settings-doc-gen := $(if $(filter buildshort,$(MAKECMDGOALS)),$(COCKROACHSHORT),$(COCKROACH))
$(SETTINGS_DOC_PAGE): $(settings-doc-gen)
@$(settings-doc-gen) gen settings-list --format=html > $@
optgen-defs := pkg/sql/opt/ops/*.opt
optgen-norm-rules := pkg/sql/opt/norm/rules/*.opt
View
@@ -152,7 +152,6 @@ define VALID_VARS
bindir
bins
build-mode
build-output
cmake-flags
configure-flags
cyan
@@ -176,6 +175,7 @@ define VALID_VARS
optgen-package
optgen-xform-rules
prefix
settings-doc-gen
sig
space
sse

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
Oops, something went wrong.

No commit comments for this range