From 6c0245cfc1baa734f0219252707b4a92475b0a20 Mon Sep 17 00:00:00 2001 From: Bob Vawter Date: Mon, 13 Dec 2021 08:56:26 -0500 Subject: [PATCH] cdc-sink: Overhaul This change overhauls the cdc-sink code to split it into well-defined packages and APIs. Notable functional changes: A cdc-sink endpoint now operates on an entire database at once, since this is the use-case that has been most prominent in discussions. An "immediate" mode is supported, which applies incoming data without waiting for resolved timestamps. This is intended for use when backfilling large datasets or if a high-volume changefeed must catch up after an outage. It is not expected to be the default configuration for cdc-sink. The cdc-sink code can now detect and recover from a limited amount of structural schema drift between the source and target databases. The schema for each target database is held in memory and refreshed from time to time. Drift is checked during resolved-timestamp flushes, which will effectively pause a changefeed until the stored payloads are at least structurally-compatible with the target tables. Notes to reviewers: The sinktypes package defines interfaces for the major moving parts of the revised cdc-sink code base. At present, each interface is implemented by a single type, but having small APIs has been useful in identifying the independent parts of cdc-sink. Similarly, the package structure may be overly fine-grained in favor of identifying specific, small portions of code that compose easily. A comment at the top of most files indicates where code was repackaged from. Recommended package review order: sinktypes, cdc/handler, backend/mutation, backend/apply, backend/timestamp --- .github/workflows/tests.yaml | 11 +- go.mod | 25 +- go.sum | 52 +- internal/backend/apply/apply.go | 300 +++ internal/backend/apply/apply_test.go | 416 +++++ internal/backend/apply/factory.go | 73 + internal/backend/mutation/factory.go | 62 + internal/backend/mutation/store.go | 159 ++ internal/backend/mutation/store_test.go | 88 + internal/backend/schemawatch/coldata.go | 86 + internal/backend/schemawatch/coldata_test.go | 133 ++ internal/backend/schemawatch/watcher.go | 202 +++ internal/backend/schemawatch/watcher_test.go | 99 + internal/backend/schemawatch/watchers.go | 83 + internal/backend/sinktest/context.go | 45 + internal/backend/sinktest/info.go | 69 + internal/backend/sinktest/sinktest.go | 171 ++ internal/backend/timestamp/store.go | 85 + internal/backend/timestamp/store_test.go | 52 + internal/frontend/cdc/handler.go | 188 ++ internal/frontend/cdc/handler_test.go | 142 ++ internal/frontend/cdc/ndjson_url.go | 84 + internal/frontend/cdc/resolved_url.go | 94 + internal/frontend/cdc/url_test.go | 42 + internal/frontend/server/integration_test.go | 107 ++ internal/frontend/server/server.go | 153 ++ internal/frontend/server/wrapper.go | 41 + internal/sinktypes/sinktypes.go | 114 ++ internal/util/batches/batches.go | 104 ++ internal/util/hlc/hlc.go | 87 + internal/util/hlc/hlc_test.go | 66 + internal/util/ident/ident.go | 110 ++ internal/util/ident/ident_test.go | 82 + util.go => internal/util/retry/retry.go | 45 +- main.go | 196 +- main_test.go | 1712 ------------------ resolved_table.go | 105 -- resolved_table_test.go | 190 -- sink.go | 329 ---- sink_table.go | 199 -- sink_table_test.go | 307 ---- sinks.go | 161 -- sql.go | 137 -- sql_test.go | 80 - url.go | 127 -- url_test.go | 28 - 46 files changed, 3651 insertions(+), 3590 deletions(-) create mode 100644 internal/backend/apply/apply.go create mode 100644 internal/backend/apply/apply_test.go create mode 100644 internal/backend/apply/factory.go create mode 100644 internal/backend/mutation/factory.go create mode 100644 internal/backend/mutation/store.go create mode 100644 internal/backend/mutation/store_test.go create mode 100644 internal/backend/schemawatch/coldata.go create mode 100644 internal/backend/schemawatch/coldata_test.go create mode 100644 internal/backend/schemawatch/watcher.go create mode 100644 internal/backend/schemawatch/watcher_test.go create mode 100644 internal/backend/schemawatch/watchers.go create mode 100644 internal/backend/sinktest/context.go create mode 100644 internal/backend/sinktest/info.go create mode 100644 internal/backend/sinktest/sinktest.go create mode 100644 internal/backend/timestamp/store.go create mode 100644 internal/backend/timestamp/store_test.go create mode 100644 internal/frontend/cdc/handler.go create mode 100644 internal/frontend/cdc/handler_test.go create mode 100644 internal/frontend/cdc/ndjson_url.go create mode 100644 internal/frontend/cdc/resolved_url.go create mode 100644 internal/frontend/cdc/url_test.go create mode 100644 internal/frontend/server/integration_test.go create mode 100644 internal/frontend/server/server.go create mode 100644 internal/frontend/server/wrapper.go create mode 100644 internal/sinktypes/sinktypes.go create mode 100644 internal/util/batches/batches.go create mode 100644 internal/util/hlc/hlc.go create mode 100644 internal/util/hlc/hlc_test.go create mode 100644 internal/util/ident/ident.go create mode 100644 internal/util/ident/ident_test.go rename util.go => internal/util/retry/retry.go (54%) delete mode 100644 main_test.go delete mode 100644 resolved_table.go delete mode 100644 resolved_table_test.go delete mode 100644 sink.go delete mode 100644 sink_table.go delete mode 100644 sink_table_test.go delete mode 100644 sinks.go delete mode 100644 sql.go delete mode 100644 sql_test.go delete mode 100644 url.go delete mode 100644 url_test.go diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9cbb04db..8d3e02f2 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -35,6 +35,8 @@ jobs: matrix: cockroachdb: [ v20.2, v21.1, v21.2 ] runs-on: ubuntu-latest + env: + COVER_OUT: coverage-${{ matrix.coverage }}.out steps: - uses: actions/checkout@v2 @@ -50,8 +52,15 @@ jobs: - name: Go Tests env: COCKROACH_DEV_LICENSE: ${{ secrets.COCKROACH_DEV_LICENSE }} - run: go test -v ./... + run: go test -v -race -coverpkg=./internal/... -covermode=atomic -coverprofile=${{ env.COVER_OUT }} ./... - name: Stop CockroachDB + if: ${{ always() }} working-directory: .github run: docker-compose down + + - name: Upload coverage + uses: codecov/codecov-action@v2 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ${{ env.COVER_OUT }} diff --git a/go.mod b/go.mod index 1c669727..b2b5479b 100644 --- a/go.mod +++ b/go.mod @@ -3,28 +3,31 @@ module github.com/cockroachdb/cdc-sink go 1.17 require ( - github.com/jackc/pgconn v1.10.0 - github.com/jackc/pgtype v1.8.1 - github.com/jackc/pgx/v4 v4.13.0 + github.com/jackc/pgconn v1.10.1 + github.com/jackc/pgtype v1.9.1 + github.com/jackc/pgx/v4 v4.14.1 github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.7.0 - golang.org/x/lint v0.0.0-20190930215403-16217165b5de - golang.org/x/net v0.0.0-20211005215030-d2e5035098b3 - honnef.co/go/tools v0.0.1-2019.2.3 + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 + golang.org/x/net v0.0.0-20211209124913-491a49abca63 + honnef.co/go/tools v0.2.2 ) require ( - github.com/BurntSushi/toml v0.3.1 // indirect + github.com/BurntSushi/toml v0.4.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect - github.com/jackc/pgproto3/v2 v2.1.1 // indirect + github.com/jackc/pgproto3/v2 v2.2.0 // indirect github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b // indirect - github.com/jackc/puddle v1.1.4 // indirect + github.com/jackc/puddle v1.2.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect + golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b // indirect + golang.org/x/mod v0.5.1 // indirect + golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2 // indirect golang.org/x/text v0.3.7 // indirect - golang.org/x/tools v0.0.0-20200103221440-774c71fcf114 // indirect + golang.org/x/tools v0.1.8 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/go.sum b/go.sum index 3b2a7aa1..3b85ca9e 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw= +github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= @@ -28,6 +30,8 @@ github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8 github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= github.com/jackc/pgconn v1.10.0 h1:4EYhlDVEMsJ30nNj0mmgwIUXoq7e9sMJrVC2ED6QlCU= github.com/jackc/pgconn v1.10.0/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= +github.com/jackc/pgconn v1.10.1 h1:DzdIHIjG1AxGwoEEqS+mGsURyjt4enSmqzACXvVzOT8= +github.com/jackc/pgconn v1.10.1/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= @@ -45,6 +49,8 @@ github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1: github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= github.com/jackc/pgproto3/v2 v2.1.1 h1:7PQ/4gLoqnl87ZxL7xjO0DR5gYuviDCZxQJsUlFW1eI= github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= +github.com/jackc/pgproto3/v2 v2.2.0 h1:r7JypeP2D3onoQTCxWdTpCtJ4D+qpKr0TxvoyMhZ5ns= +github.com/jackc/pgproto3/v2 v2.2.0/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b h1:C8S2+VttkHFdOOCXJe+YGfa4vHYwlt4Zx+IVXQ97jYg= github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E= github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg= @@ -53,17 +59,24 @@ github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrU github.com/jackc/pgtype v1.8.1-0.20210724151600-32e20a603178/go.mod h1:C516IlIV9NKqfsMCXTdChteoXmwgUceqaLfjg2e3NlM= github.com/jackc/pgtype v1.8.1 h1:9k0IXtdJXHJbyAWQgbWr1lU+MEhPXZz6RIXxfR5oxXs= github.com/jackc/pgtype v1.8.1/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= +github.com/jackc/pgtype v1.9.1 h1:MJc2s0MFS8C3ok1wQTdQxWuXQcB6+HwAm5x1CzW7mf0= +github.com/jackc/pgtype v1.9.1/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y= github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM= github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc= github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs= github.com/jackc/pgx/v4 v4.13.0 h1:JCjhT5vmhMAf/YwBHLvrBn4OGdIQBiFG6ym8Zmdx570= github.com/jackc/pgx/v4 v4.13.0/go.mod h1:9P4X524sErlaxj0XSGZk7s+LD0eOyu1ZDUrrpznYDF0= +github.com/jackc/pgx/v4 v4.14.1 h1:71oo1KAGI6mXhLiTMn6iDFcp3e7+zon/capWjl2OEFU= +github.com/jackc/pgx/v4 v4.14.1/go.mod h1:RgDuE4Z34o7XE92RpLsvFiOEfrAUT0Xt2KxvX73W06M= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.4 h1:5Ey/o5IfV7dYX6Znivq+N9MdK1S18OJI5OJq6EAAADw= github.com/jackc/puddle v1.1.4/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v1.2.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v1.2.1 h1:gI8os0wpRXFd4FiAY2dWiqRK037tjj3t7rKFeO4X5iw= +github.com/jackc/puddle v1.2.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -107,6 +120,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -130,18 +145,34 @@ golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= +golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b h1:QAqMVf3pSa6eeTsuklijukjXBlj7Es2QQplab+/RbQ4= +golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1 h1:OJxoQ/rynoF0dcCdI7cLPktw/hR2cueqYfjm43oqK38= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211005215030-d2e5035098b3 h1:G64nFNerDErBd2KdvHvIn3Ee6ccUQBTfhDZEO0DccfU= golang.org/x/net v0.0.0-20211005215030-d2e5035098b3/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211209124913-491a49abca63 h1:iocB37TsdFuN6IBRZ+ry36wrkoV51/tl5vOWqkcPGvY= +golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -152,9 +183,13 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2 h1:oJg+vmWs1UY4oSg6n1drFSkU2Nc48mxtz5qhA0HaG0I= +golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -171,12 +206,20 @@ golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114 h1:DnSr2mCsxyCE6ZgIkmcWUQY2R5cH/6wL7eIxEmQOMSE= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef h1:RHORRhs540cYZYrzgU2CPUyykkwZM78hGdzocOo9P8A= +golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.8 h1:P1HhGGuLW4aAclzjtmJdf0mJOjVUZUzOTqkAkWL+l6w= +golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= @@ -187,5 +230,8 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.6 h1:W18jzjh8mfPez+AwGLxmOImucz/IFjpNlrKVnaj2YVc= +honnef.co/go/tools v0.0.1-2020.1.6/go.mod h1:pyyisuGw24ruLjrr1ddx39WE0y9OooInRzEYLhQB2YY= +honnef.co/go/tools v0.2.2 h1:MNh1AVMyVX23VUHE2O27jm6lNj3vjO5DexS4A1xvnzk= +honnef.co/go/tools v0.2.2/go.mod h1:lPVVZ2BS5TfnjLyizF7o7hv7j9/L+8cZY2hLyjP9cGY= diff --git a/internal/backend/apply/apply.go b/internal/backend/apply/apply.go new file mode 100644 index 00000000..ff135b70 --- /dev/null +++ b/internal/backend/apply/apply.go @@ -0,0 +1,300 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package apply contains code for applying mutations to tables. +package apply + +// This file contains code repackaged from sink.go. + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "sort" + "strings" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// apply will upsert mutations and deletions into a target table. +type apply struct { + cancel context.CancelFunc + target ident.Table + + mu struct { + sync.RWMutex + columns []sinktypes.ColData + pks []sinktypes.ColData + + sql struct { + // DELETE FROM t WHERE ("pk0", "pk1") IN (SELECT unnest($1::INT8[]), unnest($2::STRING[])) + delete string + // UPSERT INTO t ("pk0", "pk1") SELECT unnest($1::INT8[]), unnest($2::STRING[]) + upsert string + } + } +} + +var _ sinktypes.Applier = (*apply)(nil) + +// newApply constructs an apply by inspecting the target table. +func newApply(w sinktypes.Watcher, target ident.Table, +) (_ *apply, cancel func(), _ error) { + ch, cancel, err := w.Watch(target) + if err != nil { + return nil, cancel, err + } + + a := &apply{cancel: cancel, target: target} + // Wait for the initial column data to be loaded. + select { + case colData := <-ch: + a.refreshUnlocked(colData) + case <-time.After(10 * time.Second): + return nil, cancel, errors.Errorf("column data timeout for %s", target) + } + + // Background routine to keep the column data refreshed. + go func() { + for { + colData, open := <-ch + if !open { + return + } + a.refreshUnlocked(colData) + log.Printf("refreshed schema for table %s", a.target) + } + }() + + return a, cancel, nil +} + +// Apply applies the mutations to the target table. +func (a *apply) Apply( + ctx context.Context, tx sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + deletes, r := batches.Mutation() + defer r() + upserts, r := batches.Mutation() + defer r() + + a.mu.RLock() + defer a.mu.RUnlock() + + if len(a.mu.columns) == 0 { + return errors.Errorf("no ColumnData available for %s", a.target) + } + + for i := range muts { + if muts[i].Delete() { + deletes = append(deletes, muts[i]) + if len(deletes) == cap(deletes) { + if err := a.deleteLocked(ctx, tx, deletes); err != nil { + return err + } + deletes = deletes[:0] + } + } else { + upserts = append(upserts, muts[i]) + if len(upserts) == cap(upserts) { + if err := a.upsertLocked(ctx, tx, upserts); err != nil { + return err + } + upserts = upserts[:0] + } + } + } + + if err := a.deleteLocked(ctx, tx, deletes); err != nil { + return err + } + return a.upsertLocked(ctx, tx, upserts) +} + +func (a *apply) deleteLocked( + ctx context.Context, db sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + if len(muts) == 0 { + return nil + } + + batch := &pgx.Batch{} + + for i := range muts { + dec := json.NewDecoder(bytes.NewReader(muts[i].Key)) + dec.UseNumber() + + args := make([]interface{}, 0, len(a.mu.pks)) + if err := dec.Decode(&args); err != nil { + return errors.WithStack(err) + } + + if len(args) != len(a.mu.pks) { + return errors.Errorf( + "schema drift detected: "+ + "inconsistent number of key colums: "+ + "received %d expect %d: "+ + "key %s@%s", + len(args), len(a.mu.pks), string(muts[i].Key), muts[i].Time) + } + + batch.Queue(a.mu.sql.delete, args...) + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + _, err := res.Exec() + if err != nil { + return errors.Wrap(err, a.mu.sql.delete) + } + } + + return nil +} + +func (a *apply) upsertLocked( + ctx context.Context, db sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + if len(muts) == 0 { + return nil + } + + batch := &pgx.Batch{} + + for i := range muts { + dec := json.NewDecoder(bytes.NewReader(muts[i].Data)) + dec.UseNumber() + + temp := make(map[string]interface{}) + if err := dec.Decode(&temp); err != nil { + return errors.WithStack(err) + } + + args := make([]interface{}, 0, len(a.mu.columns)) + for _, col := range a.mu.columns { + rawColName := col.Name.Raw() + decoded, ok := temp[rawColName] + delete(temp, rawColName) + if col.Ignored { + continue + } + // We're not going to worry about missing columns in the + // mutation to be applied unless it's a PK. If other new + // columns have been added to the target table, the source + // table might not have them yet. + if !ok && col.Primary { + return errors.Errorf( + "schema drift detected in %s: "+ + "missing PK column %s: "+ + "key %s@%s", + a.target, rawColName, + string(muts[i].Key), muts[i].Time) + } + args = append(args, decoded) + } + batch.Queue(a.mu.sql.upsert, args...) + + // If new columns have been added in the source table, but not + // in the destination, we want to error out. + if len(temp) != 0 { + var unexpected []string + for k := range temp { + unexpected = append(unexpected, k) + } + sort.Strings(unexpected) + return errors.Errorf( + "schema drift detected in %s: "+ + "unexpected columns %v: "+ + "key %s@%s", + a.target, unexpected, string(muts[i].Key), muts[i].Time) + } + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + if _, err := res.Exec(); err != nil { + return errors.Wrap(err, a.mu.sql.upsert) + } + } + return nil +} + +// refreshUnlocked updates the apply with new column information. +func (a *apply) refreshUnlocked(colData []sinktypes.ColData) { + a.mu.Lock() + defer a.mu.Unlock() + + var delete, upsert strings.Builder + lastPkColumn := 0 + + _, _ = fmt.Fprintf(&delete, "DELETE FROM %s WHERE (", a.target) + _, _ = fmt.Fprintf(&upsert, "UPSERT INTO %s (", a.target) + for i := range colData { + if colData[i].Ignored { + continue + } + if colData[i].Primary { + if i > 0 { + lastPkColumn = i + delete.WriteString(", ") + } + delete.WriteString(colData[i].Name.String()) + } + if i > 0 { + upsert.WriteString(", ") + } + upsert.WriteString(colData[i].Name.String()) + } + delete.WriteString(") IN (SELECT ") + upsert.WriteString(") SELECT ") + for i := range colData { + if colData[i].Ignored { + continue + } + if colData[i].Primary { + if i > 0 { + delete.WriteString(", ") + } + _, _ = fmt.Fprintf(&delete, "$%d::%s", i+1, colData[i].Type) + } + if i > 0 { + upsert.WriteString(", ") + } + + // The GEO types need some additional help to convert them from + // the JSON-style representations that we get. + switch colData[i].Type { + case "GEOGRAPHY": + _, _ = fmt.Fprintf(&upsert, "st_geogfromgeojson($%d::jsonb)", i+1) + case "GEOMETRY": + _, _ = fmt.Fprintf(&upsert, "st_geomfromgeojson($%d::jsonb)", i+1) + default: + _, _ = fmt.Fprintf(&upsert, "$%d::%s", i+1, colData[i].Type) + } + } + delete.WriteString(")") + + a.mu.columns = colData + a.mu.pks = colData[:lastPkColumn+1] + a.mu.sql.delete = delete.String() + a.mu.sql.upsert = upsert.String() +} diff --git a/internal/backend/apply/apply_test.go b/internal/backend/apply/apply_test.go new file mode 100644 index 00000000..ddb13090 --- /dev/null +++ b/internal/backend/apply/apply_test.go @@ -0,0 +1,416 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package apply + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/stretchr/testify/assert" +) + +// This test inserts and deletes rows from a trivial table. +func TestApply(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + type Payload struct { + Pk0 int `json:"pk0"` + Pk1 string `json:"pk1"` + } + tbl, err := sinktest.CreateTable(ctx, dbName, + "CREATE TABLE %s (pk0 INT, pk1 STRING, PRIMARY KEY (pk0,pk1))") + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + t.Run("smoke", func(t *testing.T) { + a := assert.New(t) + count := 3 * batches.Size() + adds := make([]sinktypes.Mutation, count) + dels := make([]sinktypes.Mutation, count) + for i := range adds { + p := Payload{Pk0: i, Pk1: fmt.Sprintf("X%dX", i)} + bytes, err := json.Marshal(p) + a.NoError(err) + adds[i] = sinktypes.Mutation{Data: bytes} + + bytes, err = json.Marshal([]interface{}{p.Pk0, p.Pk1}) + a.NoError(err) + dels[i] = sinktypes.Mutation{Key: bytes} + } + + // Verify insertion + a.NoError(app.Apply(ctx, dbInfo.Pool(), adds)) + ct, err := tbl.RowCount(ctx) + a.Equal(count, ct) + a.NoError(err) + + // Verify that they can be deleted. + a.NoError(app.Apply(ctx, dbInfo.Pool(), dels)) + ct, err = tbl.RowCount(ctx) + a.Equal(0, ct) + a.NoError(err) + }) + + // Verify unexpected incoming column + t.Run("unexpected", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Data: []byte(`{"pk0":1, "pk1":0, "no_good":true}`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "unexpected columns [no_good]") + } + }) + + t.Run("missing_key_upsert", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Data: []byte(`{"pk0":1}`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "missing PK column pk1") + } + }) + + t.Run("missing_key_delete_too_few", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Key: []byte(`[1]`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "received 1 expect 2") + } + }) + + t.Run("missing_key_delete_too_many", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Key: []byte(`[1, 2, 3]`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "received 3 expect 2") + } + }) +} + +// This is a smoke test, copied from main_test.go to ensure that +// all supported data types can be applied. It works by creating +// a test table for each type and using CRDB's built-in to_jsonb() +// function to create a payload. +func TestAllDataTypes(t *testing.T) { + testcases := []struct { + name string + columnType string + columnValue string + indexable bool + }{ + {`string_array`, `STRING[]`, `{"sky","road","car"}`, false}, + {`string_array_null`, `STRING[]`, ``, false}, + {`int_array`, `INT[]`, `{1,2,3}`, false}, + {`int_array_null`, `INT[]`, ``, false}, + {`serial_array`, `SERIAL[]`, `{148591304110702593,148591304110702594,148591304110702595}`, false}, + {`serial_array_null`, `SERIAL[]`, ``, false}, + {`bit`, `VARBIT`, `10010101`, true}, + {`bit_null`, `VARBIT`, ``, false}, + {`bool`, `BOOL`, `true`, true}, + {`bool_array`, `BOOL[]`, `{true, false, true}`, false}, + {`bool_null`, `BOOL`, ``, false}, + {`bytes`, `BYTES`, `b'\141\061\142\062\143\063'`, true}, + {`collate`, `STRING COLLATE de`, `'a1b2c3' COLLATE de`, true}, + {`collate_null`, `STRING COLLATE de`, ``, false}, + {`date`, `DATE`, `2016-01-25`, true}, + {`date_null`, `DATE`, ``, false}, + {`decimal`, `DECIMAL`, `1.2345`, true}, + {`decimal_null`, `DECIMAL`, ``, false}, + {`float`, `FLOAT`, `1.2345`, true}, + {`float_null`, `FLOAT`, ``, false}, + {`geography`, `GEOGRAPHY`, `0101000020E6100000000000000000F03F0000000000000040`, false}, + {`geometry`, `GEOMETRY`, `010100000075029A081B9A5DC0F085C954C1F84040`, false}, + {`inet`, `INET`, `192.168.0.1`, true}, + {`inet_null`, `INET`, ``, false}, + {`int`, `INT`, `12345`, true}, + {`int_null`, `INT`, ``, false}, + {`interval`, `INTERVAL`, `2h30m30s`, true}, + {`interval_null`, `INTERVAL`, ``, false}, + { + `jsonb`, + `JSONB`, + ` + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ], + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ] + ] + } + ], + "map": { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ], + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ] + ] + } + ] + } + } + `, + false, + }, + {`jsonb_null`, `JSONB`, ``, false}, + {`serial`, `SERIAL`, `148591304110702593`, true}, + // serial cannot be null + {`string`, `STRING`, `a1b2c3`, true}, + {`string_null`, `STRING`, ``, false}, + {`string_escape`, `STRING`, `a1\b/2?c"3`, true}, + {`time`, `TIME`, `01:23:45.123456`, true}, + {`time_null`, `TIME`, ``, false}, + {`timestamp`, `TIMESTAMP`, `2016-01-25 10:10:10`, true}, + {`timestamp_null`, `TIMESTAMP`, ``, false}, + {`timestamptz`, `TIMESTAMPTZ`, `2016-01-25 10:10:10-05:00`, true}, + {`timestamptz_null`, `TIMESTAMPTZ`, ``, false}, + {`uuid`, `UUID`, `7f9c24e8-3b12-4fef-91e0-56a2d5a246ec`, true}, + {`uuid_null`, `UUID`, ``, false}, + } + + a := assert.New(t) + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + a := assert.New(t) + + // Place the PK index on the data type under test, if allowable. + var create string + if tc.indexable { + create = fmt.Sprintf("CREATE TABLE %%s (val %s primary key, k int)", tc.columnType) + } else { + create = fmt.Sprintf("CREATE TABLE %%s (k int primary key, val %s)", tc.columnType) + } + + tbl, err := sinktest.CreateTable(ctx, dbName, create) + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + if !a.NoError(watcher.Refresh(ctx, dbInfo.Pool())) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + var jsonValue string + if tc.columnValue == "" { + jsonValue = "null" + } else { + q := fmt.Sprintf("SELECT to_json($1::%s)::string", tc.columnType) + if !a.NoError(dbInfo.Pool().QueryRow(ctx, q, tc.columnValue).Scan(&jsonValue)) { + return + } + } + t.Log(jsonValue) + + mut := sinktypes.Mutation{ + Data: []byte(fmt.Sprintf(`{"k":1,"val":%s}`, jsonValue)), + } + a.NoError(app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{mut})) + + var jsonFound string + a.NoError(dbInfo.Pool().QueryRow(ctx, + fmt.Sprintf("SELECT ifnull(to_json(val)::string, 'null') FROM %s", tbl), + ).Scan(&jsonFound)) + a.Equal(jsonValue, jsonFound) + }) + } +} + +// Ensure that if stored computed columns are present, we don't +// try to write to them and that we correctly ignore those columns +// in incoming payloads. +func TestVirtualColumns(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + type Payload struct { + A int `json:"a"` + B int `json:"b"` + C int `json:"c"` + X int `json:"x,omitempty"` + } + tbl, err := sinktest.CreateTable(ctx, dbName, + "CREATE TABLE %s (a INT, b INT, c INT AS (a + b) STORED, PRIMARY KEY (a,b))") + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + t.Run("computed-is-ignored", func(t *testing.T) { + a := assert.New(t) + p := Payload{A: 1, B: 2, C: 3} + bytes, err := json.Marshal(p) + a.NoError(err) + muts := []sinktypes.Mutation{{Data: bytes}} + + a.NoError(app.Apply(ctx, dbInfo.Pool(), muts)) + }) + + t.Run("unknown-still-breaks", func(t *testing.T) { + a := assert.New(t) + p := Payload{A: 1, B: 2, C: 3, X: -1} + bytes, err := json.Marshal(p) + a.NoError(err) + muts := []sinktypes.Mutation{{Data: bytes}} + + err = app.Apply(ctx, dbInfo.Pool(), muts) + if a.Error(err) { + a.Contains(err.Error(), "unexpected columns") + } + }) +} diff --git a/internal/backend/apply/factory.go b/internal/backend/apply/factory.go new file mode 100644 index 00000000..12c990b9 --- /dev/null +++ b/internal/backend/apply/factory.go @@ -0,0 +1,73 @@ +package apply + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" +) + +// factory vends singleton instance of apply. +type factory struct { + watchers sinktypes.Watchers + mu struct { + sync.RWMutex + cleanup []func() + instances map[ident.Table]*apply + } +} + +var _ sinktypes.Appliers = (*factory)(nil) + +// New returns an instance of sinktypes.Appliers. +func New(watchers sinktypes.Watchers) (_ sinktypes.Appliers, cancel func()) { + f := &factory{watchers: watchers} + f.mu.instances = make(map[ident.Table]*apply) + return f, func() { + f.mu.Lock() + defer f.mu.Unlock() + for _, fn := range f.mu.cleanup { + fn() + } + f.mu.cleanup = nil + f.mu.instances = nil + } +} + +// Get returns a memoized instance of the Applier for the table. +func (f *factory) Get( + ctx context.Context, table ident.Table, +) (sinktypes.Applier, error) { + if ret := f.getUnlocked(table); ret != nil { + return ret, nil + } + return f.createUnlocked(ctx, table) +} + +func (f *factory) createUnlocked( + ctx context.Context, table ident.Table, +) (*apply, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if ret := f.mu.instances[table]; ret != nil { + return ret, nil + } + watcher, err := f.watchers.Get(ctx, table.Database()) + if err != nil { + return nil, err + } + ret, cancel, err := newApply(watcher, table) + if err == nil { + f.mu.cleanup = append(f.mu.cleanup, cancel) + f.mu.instances[table] = ret + } + return ret, err +} + +func (f *factory) getUnlocked(table ident.Table) *apply { + f.mu.RLock() + defer f.mu.RUnlock() + return f.mu.instances[table] +} diff --git a/internal/backend/mutation/factory.go b/internal/backend/mutation/factory.go new file mode 100644 index 00000000..288c37b9 --- /dev/null +++ b/internal/backend/mutation/factory.go @@ -0,0 +1,62 @@ +package mutation + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4/pgxpool" +) + +type factory struct { + db *pgxpool.Pool + stagingDB ident.Ident + + mu struct { + sync.RWMutex + instances map[ident.Table]*store + } +} + +var _ sinktypes.MutationStores = (*factory)(nil) + +// New returns an instance of sinktypes.MutationStores that stores +// temporary data in the given SQL database. +func New(db *pgxpool.Pool, stagingDB ident.Ident) sinktypes.MutationStores { + f := &factory{ + db: db, + stagingDB: stagingDB, + } + f.mu.instances = make(map[ident.Table]*store) + return f +} + +// Get returns a memorized instance of a store for the given table. +func (f *factory) Get(ctx context.Context, target ident.Table) (sinktypes.MutationStore, error) { + if ret := f.getUnlocked(target); ret != nil { + return ret, nil + } + return f.createUnlocked(ctx, target) +} + +func (f *factory) createUnlocked(ctx context.Context, table ident.Table) (*store, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if ret := f.mu.instances[table]; ret != nil { + return ret, nil + } + + ret, err := newStore(ctx, f.db, f.stagingDB, table) + if err == nil { + f.mu.instances[table] = ret + } + return ret, err +} + +func (f *factory) getUnlocked(table ident.Table) *store { + f.mu.RLock() + defer f.mu.RUnlock() + return f.mu.instances[table] +} diff --git a/internal/backend/mutation/store.go b/internal/backend/mutation/store.go new file mode 100644 index 00000000..122396b2 --- /dev/null +++ b/internal/backend/mutation/store.go @@ -0,0 +1,159 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package mutation defines a means of storing and retrieving mutations +// to be applied to a table. +package mutation + +// The code in this file is reworked from sink_table.go. + +import ( + "context" + "fmt" + "log" + "strings" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// store implements a storage and retrieval mechanism for staging +// Mutation instances. +type store struct { + // The staging table that holds the mutations. + stage ident.Table + + // Compute SQL fragments exactly once on startup. + sql struct { + drain string // drain rows from the staging table + store string // store mutations + } +} + +var _ sinktypes.MutationStore = (*store)(nil) + +// newStore constructs a new mutation store that will track pending +// mutations to be applied to the given target table. +func newStore( + ctx context.Context, db pgxtype.Querier, stagingDB ident.Ident, target ident.Table, +) (*store, error) { + mangledName := "_" + strings.Join( + []string{target.Database().Raw(), target.Schema().Raw(), target.Table().Raw()}, "_") + stage := ident.NewTable(stagingDB, ident.Public, ident.New(mangledName)) + + if err := retry.Execute(ctx, db, fmt.Sprintf(` +CREATE TABLE IF NOT EXISTS %s ( + nanos INT NOT NULL, + logical INT NOT NULL, + key STRING NOT NULL, + mut JSONB NOT NULL, + PRIMARY KEY (nanos, logical, key) +)`, stage)); err != nil { + return nil, err + } + + s := &store{stage: stage} + + s.sql.drain = fmt.Sprintf(drainTemplate, stage) + s.sql.store = fmt.Sprintf(putTemplate, stage) + + return s, nil +} + +const drainTemplate = ` +WITH d AS (DELETE FROM %s +WHERE (nanos, logical) BETWEEN ($1, $2) AND ($3, $4) +RETURNING nanos, logical, key, mut) +SELECT DISTINCT ON (key) nanos, logical, key, mut FROM d +ORDER BY key ASC, nanos DESC, logical DESC +` + +// Drain dequeues mutations between the given timestamps. +func (s *store) Drain( + ctx context.Context, tx pgxtype.Querier, prev, next hlc.Time, +) ([]sinktypes.Mutation, error) { + var ret []sinktypes.Mutation + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, s.sql.drain, + prev.Nanos(), prev.Logical(), next.Nanos(), next.Logical(), + ) + if err != nil { + return err + } + defer rows.Close() + + // Clear any previous loop, but save the backing array. + ret = ret[:0] + for rows.Next() { + var mut sinktypes.Mutation + var nanos int64 + var logical int + if err := rows.Scan(&nanos, &logical, &mut.Key, &mut.Data); err != nil { + return err + } + mut.Time = hlc.New(nanos, logical) + ret = append(ret, mut) + } + return nil + }) + return ret, errors.Wrapf(err, "drain %s [%s, %s]", s.stage, prev, next) +} + +// Arrays of JSONB aren't implemented +// https://github.com/cockroachdb/cockroach/issues/23468 +const putTemplate = `UPSERT INTO %s (nanos, logical, key, mut) VALUES ($1, $2, $3, $4)` + +// Store stores some number of Mutations into the database. +func (s *store) Store( + ctx context.Context, db sinktypes.Batcher, mutations []sinktypes.Mutation, +) error { + return batches.Batch(len(mutations), func(begin, end int) error { + return s.putOne(ctx, db, mutations[begin:end]) + }) +} + +func (s *store) putOne( + ctx context.Context, db sinktypes.Batcher, mutations []sinktypes.Mutation, +) error { + batch := &pgx.Batch{} + + for i := range mutations { + var jsonText string + if mutations[i].Delete() { + jsonText = "null" + } else { + jsonText = string(mutations[i].Data) + } + + batch.Queue(s.sql.store, + mutations[i].Time.Nanos(), + mutations[i].Time.Logical(), + string(mutations[i].Key), + jsonText) + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + if _, err := res.Exec(); err != nil { + return errors.Wrap(err, s.sql.store) + } + } + + log.Printf("staged %d entries for %s", len(mutations), s.stage) + return nil +} diff --git a/internal/backend/mutation/store_test.go b/internal/backend/mutation/store_test.go new file mode 100644 index 00000000..1e26b051 --- /dev/null +++ b/internal/backend/mutation/store_test.go @@ -0,0 +1,88 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package mutation + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +// TestPutAndDrain will insert and dequeue a batch of Mutations. +func TestPutAndDrain(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + a.NotEmpty(dbInfo.Version()) + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + factory := New(dbInfo.Pool(), ident.StagingDB) + + dummyTarget := ident.NewTable( + targetDB, ident.Public, ident.New("target")) + + s, err := factory.Get(ctx, dummyTarget) + if !a.NoError(err) { + return + } + a.NotNil(s) + + stagingTable := s.(*store).stage + + // Cook test data. + total := 3 * batches.Size() + muts := make([]sinktypes.Mutation, total) + for i := range muts { + muts[i] = sinktypes.Mutation{ + Data: []byte(fmt.Sprintf(`{"pk": %d}`, i)), + Key: []byte(fmt.Sprintf(`[%d]`, i)), + Time: hlc.New(int64(1000*i), i), + } + } + + // Insert. + a.NoError(s.Store(ctx, dbInfo.Pool(), muts)) + + // Sanity-check table. + count, err := sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(total, count) + + // Ensure that data insertion is idempotent. + a.NoError(s.Store(ctx, dbInfo.Pool(), muts)) + + // Sanity-check table. + count, err = sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(total, count) + + // Dequeue. + ret, err := s.Drain(ctx, dbInfo.Pool(), + hlc.Zero(), hlc.New(int64(1000*total+1), 0)) + a.NoError(err) + a.Len(ret, total) + + // Should be empty now. + count, err = sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(0, count) +} diff --git a/internal/backend/schemawatch/coldata.go b/internal/backend/schemawatch/coldata.go new file mode 100644 index 00000000..372fb364 --- /dev/null +++ b/internal/backend/schemawatch/coldata.go @@ -0,0 +1,86 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" +) + +func colSliceEqual(a, b []sinktypes.ColData) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// Retrieve the primary key columns in their index-order, then append +// any remaining non-generated columns. +const sqlColumnsQuery = ` +WITH +pks AS ( + SELECT column_name, seq_in_index FROM [SHOW INDEX FROM %[1]s] + WHERE index_name = 'primary' AND NOT storing), +cols AS ( + SELECT column_name, data_type, generation_expression != '' AS ignored + FROM [SHOW COLUMNS FROM %[1]s]), +ordered AS ( + SELECT column_name, min(ifnull(pks.seq_in_index, 2048)) AS seq_in_index FROM + cols LEFT JOIN pks USING (column_name) + GROUP BY column_name) +SELECT cols.column_name, pks.seq_in_index IS NOT NULL, cols.data_type, cols.ignored +FROM cols +JOIN ordered USING (column_name) +LEFT JOIN pks USING (column_name) +ORDER BY ordered.seq_in_index, cols.column_name +` + +// getColumns returns the column names for the primary key columns in +// their index-order, followed by all other columns that should be +// mutated. +func getColumns( + ctx context.Context, tx pgxtype.Querier, table ident.Table, +) ([]sinktypes.ColData, error) { + stmt := fmt.Sprintf(sqlColumnsQuery, table) + + var columns []sinktypes.ColData + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, stmt) + if err != nil { + return err + } + defer rows.Close() + + // Clear from previous loop. + columns = columns[:0] + for rows.Next() { + var column sinktypes.ColData + var name string + if err := rows.Scan(&name, &column.Primary, &column.Type, &column.Ignored); err != nil { + return err + } + column.Name = ident.New(name) + columns = append(columns, column) + } + return nil + }) + return columns, err +} diff --git a/internal/backend/schemawatch/coldata_test.go b/internal/backend/schemawatch/coldata_test.go new file mode 100644 index 00000000..b7b56e87 --- /dev/null +++ b/internal/backend/schemawatch/coldata_test.go @@ -0,0 +1,133 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +// This file contains code repackaged from sql_test.go. + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/stretchr/testify/assert" +) + +func TestGetColumns(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + // Create the test db + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + type testcase struct { + tableSchema string + primaryKeys []string + dataCols []string + } + testcases := []testcase{ + { + "a INT", + []string{"rowid"}, + []string{"a"}, + }, + { + "a INT PRIMARY KEY", + []string{"a"}, + nil, + }, + { + "a INT, b INT, PRIMARY KEY (a,b)", + []string{"a", "b"}, + nil, + }, + { + "a INT, b INT, PRIMARY KEY (b,a)", + []string{"b", "a"}, + nil, + }, + { + "a INT, b INT, c INT, PRIMARY KEY (b,a,c)", + []string{"b", "a", "c"}, + nil, + }, + { + "a INT, b INT, q INT, c INT, r INT, PRIMARY KEY (b,a,c)", + []string{"b", "a", "c"}, + []string{"q", "r"}, + }, + { + "a INT, b INT, r INT, c INT, q INT, PRIMARY KEY (b,a,c) USING HASH WITH BUCKET_COUNT = 8", + []string{"ignored_crdb_internal_a_b_c_shard_8", "b", "a", "c"}, + []string{"q", "r"}, + }, + // Ensure that computed columns are ignored. + { + tableSchema: "a INT, b INT, " + + "c INT AS (a + b) STORED, " + + "PRIMARY KEY (a,b)", + primaryKeys: []string{"a", "b"}, + dataCols: []string{"ignored_c"}, + }, + } + + // Virtual columns not supported before v21.1 + if !strings.Contains(dbInfo.Version(), "v20.2.") { + testcases = append(testcases, + testcase{ + tableSchema: "a INT, b INT, " + + "c INT AS (a + b) STORED, " + + "d INT AS (a + b) VIRTUAL, " + + "PRIMARY KEY (a,b)", + primaryKeys: []string{"a", "b"}, + dataCols: []string{"ignored_c", "ignored_d"}, + }, + ) + } + + for i, test := range testcases { + t.Run(fmt.Sprintf("%d:%s", i, test.tableSchema), func(t *testing.T) { + a := assert.New(t) + + tableName := ident.NewTable(dbName, ident.Public, ident.Newf("test_%d", i)) + if !a.NoError(retry.Execute(ctx, dbInfo.Pool(), + fmt.Sprintf(`CREATE TABLE %s ( %s )`, tableName, test.tableSchema))) { + return + } + colData, err := getColumns(ctx, dbInfo.Pool(), tableName) + if !a.NoError(err) { + return + } + var primaryKeys, dataCols []string + for i := range colData { + a.NotEmpty(colData[i].Type) + name := colData[i].Name.Raw() + if colData[i].Ignored { + name = "ignored_" + name + } + if colData[i].Primary { + primaryKeys = append(primaryKeys, name) + } else { + dataCols = append(dataCols, name) + } + } + a.Equal(test.primaryKeys, primaryKeys) + a.Equal(test.dataCols, dataCols) + }) + } +} diff --git a/internal/backend/schemawatch/watcher.go b/internal/backend/schemawatch/watcher.go new file mode 100644 index 00000000..b40c9f6a --- /dev/null +++ b/internal/backend/schemawatch/watcher.go @@ -0,0 +1,202 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package schemawatch contains code to allow the schema of a target +// database to be queried and monitored. +package schemawatch + +import ( + "context" + "flag" + "fmt" + "log" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/pkg/errors" +) + +// RefreshDelay controls how ofter a Watcher will refresh its schema. +var RefreshDelay = flag.Duration("schemaRefresh", time.Minute, + "how often to scan for schema changes") + +// dbSchema is a simplified representation of a SQL database's schema. +type dbSchema map[ident.Table][]sinktypes.ColData + +// A Watcher maintains an internal cache of a database's schema, +// allowing callers to receive notifications of schema changes. +type Watcher struct { + // All goroutines used by Watch use this as a parent context. + background context.Context + dbName ident.Ident + delay time.Duration + + mu struct { + sync.RWMutex + cond sync.Cond // Conditional on the RLocker + data dbSchema + } + + sql struct { + tables string + } +} + +var _ sinktypes.Watcher = (*Watcher)(nil) + +// newWatcher constructs a new Watcher to monitor the table schema in the +// named database. The returned Watcher will internally refresh +// until the cancel callback is executed. +func newWatcher( + ctx context.Context, tx pgxtype.Querier, dbName ident.Ident, +) (_ *Watcher, cancel func(), _ error) { + background, cancel := context.WithCancel(context.Background()) + + w := &Watcher{ + background: background, + delay: *RefreshDelay, + dbName: dbName, + } + w.mu.cond.L = w.mu.RLocker() + w.sql.tables = fmt.Sprintf(tableTemplate, dbName) + + // Initial data load to sanity-check and make ready. + data, err := w.getTables(ctx, tx) + if err != nil { + cancel() + return nil, nil, err + } + w.mu.data = data + + go func() { + for { + select { + case <-background.Done(): + return + case <-time.After(w.delay): + } + + if err := w.Refresh(background, tx); err != nil { + log.Printf("could not refresh table data: %v", err) + } + } + }() + + return w, cancel, nil +} + +// Refresh immediately refreshes the Watcher's internal cache. This +// is intended for use by tests. +func (w *Watcher) Refresh(ctx context.Context, tx pgxtype.Querier) error { + data, err := w.getTables(ctx, tx) + if err != nil { + log.Printf("could not refresh table data: %v", err) + } + + w.mu.Lock() + w.mu.data = data + w.mu.Unlock() + w.mu.cond.Broadcast() + return nil +} + +// Snapshot returns the latest known schema for the target database. +func (w *Watcher) Snapshot() map[ident.Table][]sinktypes.ColData { + w.mu.RLock() + defer w.mu.RUnlock() + + ret := make(map[ident.Table][]sinktypes.ColData, len(w.mu.data)) + for name, cols := range w.mu.data { + ret[name] = append(cols[:0], cols...) + } + return ret +} + +// Watch will send updated column data for the given table until the +// watch is canceled. The requested table must already be known to the +// Watcher. +func (w *Watcher) Watch(table ident.Table) (_ <-chan []sinktypes.ColData, cancel func(), _ error) { + w.mu.RLock() + defer w.mu.RUnlock() + if _, ok := w.mu.data[table]; !ok { + return nil, nil, errors.Errorf("unknown table %s", table) + } + + ctx, cancel := context.WithCancel(w.background) + ch := make(chan []sinktypes.ColData, 1) + + go func() { + defer close(ch) + + // All code below is read-locked, so we can't miss updates. + w.mu.cond.L.Lock() + defer w.mu.cond.L.Unlock() + + var last []sinktypes.ColData + for { + next, ok := w.mu.data[table] + // Respond to context cancellation or dropping the table. + if !ok || ctx.Err() != nil { + return + } + + // We're read-locked, so this isn't hugely critical. + if !colSliceEqual(last, next) { + select { + case <-ctx.Done(): + return + case ch <- next: + last = next + default: + log.Fatal("ColData watcher excessively behind") + } + } + + w.mu.cond.Wait() + } + }() + return ch, cancel, nil +} + +const tableTemplate = `SELECT schema_name, table_name FROM [SHOW TABLES FROM %s]` + +func (w *Watcher) getTables( + ctx context.Context, tx pgxtype.Querier, +) (dbSchema, error) { + var ret dbSchema + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, w.sql.tables) + if err != nil { + return err + } + defer rows.Close() + + ret = make(dbSchema) + for rows.Next() { + var schema, table string + if err := rows.Scan(&schema, &table); err != nil { + return err + } + tbl := ident.NewTable(w.dbName, ident.New(schema), ident.New(table)) + cols, err := getColumns(ctx, tx, tbl) + if err != nil { + return err + } + ret[tbl] = cols + } + return nil + }) + + return ret, errors.Wrap(err, w.sql.tables) +} diff --git a/internal/backend/schemawatch/watcher_test.go b/internal/backend/schemawatch/watcher_test.go new file mode 100644 index 00000000..4ac03f24 --- /dev/null +++ b/internal/backend/schemawatch/watcher_test.go @@ -0,0 +1,99 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "fmt" + "testing" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/stretchr/testify/assert" +) + +func TestWatch(t *testing.T) { + a := assert.New(t) + + // Override the delay to exercise the background goroutine. + *RefreshDelay = time.Second + defer func() { *RefreshDelay = time.Minute }() + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + // Bootstrap column. + tblInfo, err := sinktest.CreateTable(ctx, dbName, "CREATE TABLE %s (pk INT PRIMARY KEY)") + if !a.NoError(err) { + return + } + + w, cancel, err := newWatcher(ctx, dbInfo.Pool(), dbName) + if !a.NoError(err) { + return + } + defer cancel() + + ch, cancel, err := w.Watch(tblInfo.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel data") + case data := <-ch: + if a.Len(data, 1) { + a.Equal("pk", data[0].Name.Raw()) + } + } + + // Add a column and expect to see it. + if !a.NoError(retry.Execute(ctx, dbInfo.Pool(), + fmt.Sprintf("ALTER TABLE %s ADD COLUMN v STRING", tblInfo.Name()))) { + return + } + + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel data") + case data := <-ch: + if a.Len(data, 2) { + a.Equal("pk", data[0].Name.Raw()) + a.Equal("v", data[1].Name.Raw()) + } + } + + // Expect the channel to close if the table is dropped. + if !a.NoError(tblInfo.DropTable(ctx)) { + return + } + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel close") + case _, open := <-ch: + a.False(open) + } + + // Check that we error out quickly on unknown tables. + ch, cancel, err = w.Watch(ident.NewTable(dbName, ident.Public, ident.New("blah"))) + a.Nil(ch) + a.Nil(cancel) + a.Error(err) +} diff --git a/internal/backend/schemawatch/watchers.go b/internal/backend/schemawatch/watchers.go new file mode 100644 index 00000000..3c1b2191 --- /dev/null +++ b/internal/backend/schemawatch/watchers.go @@ -0,0 +1,83 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4/pgxpool" +) + +// Watchers is a memoizing factory for Watcher instances. +type Watchers struct { + pool *pgxpool.Pool + mu struct { + sync.RWMutex + cancels []func() + data map[ident.Ident]*Watcher + } +} + +var _ sinktypes.Watchers = (*Watchers)(nil) + +// NewWatchers creates a Watchers factory. +func NewWatchers(pool *pgxpool.Pool) (_ *Watchers, cancel func()) { + w := &Watchers{pool: pool} + w.mu.data = make(map[ident.Ident]*Watcher) + return w, w.close +} + +// Get creates or returns a memoized Watcher for the given database. +func (w *Watchers) Get(ctx context.Context, db ident.Ident) (sinktypes.Watcher, error) { + if ret := w.getUnlocked(db); ret != nil { + return ret, nil + } + return w.createUnlocked(ctx, db) +} + +// close destroys all Watcher instances associated with the factory. +func (w *Watchers) close() { + w.mu.Lock() + defer w.mu.Unlock() + for _, cancel := range w.mu.cancels { + cancel() + } + w.mu.cancels = nil + w.mu.data = make(map[ident.Ident]*Watcher) +} + +func (w *Watchers) createUnlocked(ctx context.Context, db ident.Ident) (*Watcher, error) { + w.mu.Lock() + defer w.mu.Unlock() + + if ret := w.mu.data[db]; ret != nil { + return ret, nil + } + + ret, cancel, err := newWatcher(ctx, w.pool, db) + if err != nil { + return nil, err + } + + w.mu.cancels = append(w.mu.cancels, cancel) + w.mu.data[db] = ret + return ret, nil +} + +func (w *Watchers) getUnlocked(db ident.Ident) *Watcher { + w.mu.RLock() + defer w.mu.RUnlock() + return w.mu.data[db] + +} diff --git a/internal/backend/sinktest/context.go b/internal/backend/sinktest/context.go new file mode 100644 index 00000000..62767dbf --- /dev/null +++ b/internal/backend/sinktest/context.go @@ -0,0 +1,45 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sinktest + +import ( + "context" + "flag" + "time" +) + +var caseTimout = flag.Duration( + "caseTimout", + 2*time.Minute, + "raise this value when debugging to allow individual tests to run longer", +) + +// key is a typesafe context key used by Context(). +type key struct{} + +// Context returns a per-test Context which has a common timeout +// behavior and global connection pool. This method will panic if +// the database could not be created +func Context() (context.Context, *DBInfo, context.CancelFunc) { + ctx, cancel := context.WithTimeout(context.Background(), *caseTimout) + db, err := bootstrap(ctx) + if err != nil { + panic(err) + } + ctx = context.WithValue(ctx, key{}, db) + return ctx, db, cancel +} + +// DB returns the database associated with the Context. +func DB(ctx context.Context) *DBInfo { + info, _ := ctx.Value(key{}).(*DBInfo) + return info +} diff --git a/internal/backend/sinktest/info.go b/internal/backend/sinktest/info.go new file mode 100644 index 00000000..913072d7 --- /dev/null +++ b/internal/backend/sinktest/info.go @@ -0,0 +1,69 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sinktest + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgx/v4/pgxpool" +) + +// DBInfo encapsulates metadata and a connection to a database. +type DBInfo struct { + db *pgxpool.Pool + version string +} + +// Pool returns the underlying database connection. +func (di DBInfo) Pool() *pgxpool.Pool { return di.db } + +// Version returns the database version. +func (di DBInfo) Version() string { return di.version } + +// TableInfo provides a named table and a means to access it. +type TableInfo struct { + *DBInfo + name ident.Table +} + +// NewTableInfo constructs a TableInfo using the given name. +func NewTableInfo(db *DBInfo, name ident.Table) TableInfo { + return TableInfo{db, name} +} + +// DeleteAll deletes (not TRUNCATEs) all rows in the table. +func (ti TableInfo) DeleteAll(ctx context.Context) error { + return retry.Execute(ctx, ti.db, fmt.Sprintf("DELETE FROM %s WHERE true", ti.name)) +} + +// DropTable drops the table if it exists. +func (ti TableInfo) DropTable(ctx context.Context) error { + return retry.Execute(ctx, ti.db, fmt.Sprintf("DROP TABLE IF EXISTS %s", ti.name)) +} + +// Exec executes a single SQL statement. The sql string must include +// a single string substitution marker to receive the table name. +func (ti TableInfo) Exec(ctx context.Context, sql string, args ...interface{}) error { + return retry.Execute(ctx, ti.Pool(), fmt.Sprintf(sql, ti.Name()), args...) +} + +// Name returns the table name. +func (ti TableInfo) Name() ident.Table { return ti.name } + +// RowCount returns the number of rows in the table. +func (ti TableInfo) RowCount(ctx context.Context) (int, error) { + return GetRowCount(ctx, ti.db, ti.Name()) +} + +func (ti TableInfo) String() string { return ti.name.String() } diff --git a/internal/backend/sinktest/sinktest.go b/internal/backend/sinktest/sinktest.go new file mode 100644 index 00000000..54af1579 --- /dev/null +++ b/internal/backend/sinktest/sinktest.go @@ -0,0 +1,171 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package sinktest contains code to assist in writing tests. +package sinktest + +import ( + "context" + "flag" + "fmt" + "log" + "math/rand" + "os" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" +) + +var connString = flag.String("testConnect", + "postgresql://root@localhost:26257/defaultdb?sslmode=disable&experimental_enable_hash_sharded_indexes=true", + "the connection string to use for testing") + +var globalDBInfo struct { + sync.Mutex + *DBInfo +} + +func bootstrap(ctx context.Context) (*DBInfo, error) { + globalDBInfo.Lock() + defer globalDBInfo.Unlock() + + if globalDBInfo.DBInfo != nil { + return globalDBInfo.DBInfo, nil + } + + if !flag.Parsed() { + flag.Parse() + } + + // Create the testing database + rand.Seed(time.Now().UnixNano()) + + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + pool, err := pgxpool.Connect(ctx, *connString) + if err != nil { + return nil, errors.Wrap(err, "could not open database connection") + } + globalDBInfo.DBInfo = &DBInfo{db: pool} + + if lic, ok := os.LookupEnv("COCKROACH_DEV_LICENSE"); ok { + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING cluster.organization = $1", + "Cockroach Labs - Production Testing", + ); err != nil { + return nil, errors.Wrap(err, "could not set cluster.organization") + } + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING enterprise.license = $1", lic, + ); err != nil { + return nil, errors.Wrap(err, "could not set enterprise.license") + } + } + + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { + return nil, errors.Wrap(err, "could not enable rangefeeds") + } + + if err := retry.Retry(ctx, func(ctx context.Context) error { + return pool.QueryRow(ctx, "SELECT version()").Scan(&globalDBInfo.version) + }); err != nil { + return nil, errors.Wrap(err, "could not determine cluster version") + } + + return globalDBInfo.DBInfo, nil +} + +// CreateDB creates a new testing SQL DATABASE whose lifetime is bounded +// by that of the associated context, which must be derived from the +// Context() method in this package. +func CreateDB(ctx context.Context) (dbName ident.Ident, cancel func(), _ error) { + db := DB(ctx).Pool() + dbNum := rand.Intn(10000) + name := ident.New(fmt.Sprintf("_test_db_%d", dbNum)) + + cancel = func() { + err := retry.Execute(ctx, db, fmt.Sprintf("DROP DATABASE IF EXISTS %s CASCADE", name)) + log.Printf("dropped database %s %v", name, err) + } + + // Ensure that the base database exists + if err := retry.Execute(ctx, db, fmt.Sprintf( + "CREATE DATABASE IF NOT EXISTS %s", ident.StagingDB)); err != nil { + return name, cancel, errors.WithStack(err) + } + + if err := retry.Execute(ctx, db, fmt.Sprintf( + "CREATE DATABASE IF NOT EXISTS %s", name)); err != nil { + return name, cancel, errors.WithStack(err) + } + + if err := retry.Execute(ctx, db, fmt.Sprintf( + `ALTER DATABASE %s CONFIGURE ZONE USING gc.ttlseconds = 600`, name)); err != nil { + return name, cancel, errors.WithStack(err) + } + + return name, cancel, nil +} + +// CreateTable creates a test table and returns a unique name. The +// schemaSpec parameter must have exactly one %s substitution parameter +// for the database name and table name. +func CreateTable(ctx context.Context, dbName ident.Ident, schemaSpec string) (TableInfo, error) { + var table ident.Table + db := DB(ctx) + if db == nil { + return TableInfo{}, errors.New("no database in context") + } + +outer: + for { + // Create the testing database + tableNum := rand.Intn(10000) + tableName := ident.New(fmt.Sprintf("_test_table_%d", tableNum)) + + // Find the DB. + var actualTableName string + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.Pool().QueryRow(ctx, + fmt.Sprintf("SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = $1", dbName), + tableName.Raw(), + ).Scan(&actualTableName) + }) + switch err { + case pgx.ErrNoRows: + table = ident.NewTable(dbName, ident.Public, tableName) + break outer + case nil: + continue + default: + return TableInfo{}, errors.WithStack(err) + } + } + + err := retry.Execute(ctx, db.Pool(), fmt.Sprintf(schemaSpec, table)) + return TableInfo{db, table}, errors.WithStack(err) +} + +// GetRowCount returns the number of rows in the table. +func GetRowCount(ctx context.Context, db pgxtype.Querier, name ident.Table) (int, error) { + var count int + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", name)).Scan(&count) + }) + return count, err +} diff --git a/internal/backend/timestamp/store.go b/internal/backend/timestamp/store.go new file mode 100644 index 00000000..885541b3 --- /dev/null +++ b/internal/backend/timestamp/store.go @@ -0,0 +1,85 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package timestamp implements a simple key-timestamp store. +package timestamp + +// The code in this file is adapted from resolved_table.go + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// DefaultTable is a default table name to pass to New. +var DefaultTable = ident.NewTable(ident.StagingDB, ident.Public, ident.New("_timestamps")) + +// store implements a simple key/value store for HLC timestamps. +type store struct { + sql struct { + swap string + } +} + +var _ sinktypes.TimeSwapper = (*store)(nil) + +// New constructs a store using the specified table for storage. +func New( + ctx context.Context, tx pgxtype.Querier, target ident.Table, +) (sinktypes.TimeSwapper, error) { + if err := retry.Execute(ctx, tx, fmt.Sprintf(` +CREATE TABLE IF NOT EXISTS %s ( +key STRING NOT NULL PRIMARY KEY, +nanos INT8 NOT NULL, +logical INT8 NOT NULL +) +`, target)); err != nil { + return nil, errors.WithStack(err) + } + + ret := &store{} + ret.sql.swap = fmt.Sprintf(swapTemplate, target) + + return ret, nil +} + +const swapTemplate = ` +WITH u AS (UPSERT INTO %[1]s (nanos, logical, key) VALUES ($1, $2, $3) RETURNING 0) +SELECT nanos, logical FROM %[1]s WHERE key=$3` + +// Swap updates the value associated with the key, returning the +// previous value. +func (s *store) Swap( + ctx context.Context, db pgxtype.Querier, key string, value hlc.Time, +) (hlc.Time, error) { + var nanos int64 + var logical int + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.QueryRow( + ctx, + s.sql.swap, + value.Nanos(), + value.Logical(), + key).Scan(&nanos, &logical) + }) + // No rows means that we haven't seen this key before. + if errors.Is(err, pgx.ErrNoRows) { + return hlc.Zero(), nil + } + return hlc.New(nanos, logical), errors.Wrap(err, s.sql.swap) +} diff --git a/internal/backend/timestamp/store_test.go b/internal/backend/timestamp/store_test.go new file mode 100644 index 00000000..192a272f --- /dev/null +++ b/internal/backend/timestamp/store_test.go @@ -0,0 +1,52 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package timestamp + +import ( + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/stretchr/testify/assert" +) + +func TestSwap(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + a.NotEmpty(dbInfo.Version()) + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + s, err := New(ctx, dbInfo.Pool(), DefaultTable) + if !a.NoError(err) { + return + } + + const count = 10 + prev := hlc.Zero() + for i := 0; i <= count; i++ { + next := hlc.New(int64(1000*i), i) + found, err := s.Swap(ctx, dbInfo.Pool(), targetDB.Raw(), next) + if !a.NoError(err) { + return + } + a.Equal(prev, found) + prev = next + } + + a.Equal(int64(1000*count), prev.Nanos()) + a.Equal(count, prev.Logical()) +} diff --git a/internal/frontend/cdc/handler.go b/internal/frontend/cdc/handler.go new file mode 100644 index 00000000..0a020e5a --- /dev/null +++ b/internal/frontend/cdc/handler.go @@ -0,0 +1,188 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package cdc contains a http.Handler which can receive +// webhook events from a CockroachDB CDC changefeed. +package cdc + +import ( + "bufio" + "context" + "io" + "log" + "net/http" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" +) + +// This file contains code repackaged from main.go + +// Handler is an http.Handler for processing webhook requests +// from a CockroachDB changefeed. +type Handler struct { + Appliers sinktypes.Appliers // Update tables within TargetDb. + Immediate bool // If true, apply mutations immediately. + Pool *pgxpool.Pool // Access to the target cluster. + Stores sinktypes.MutationStores // Record incoming json blobs. + Swapper sinktypes.TimeSwapper // Tracks named timestamps. + Watchers sinktypes.Watchers // Schema data. +} + +func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + sendErr := func(err error) { + if err == nil { + http.Error(w, "OK", http.StatusOK) + return + } + http.Error(w, err.Error(), http.StatusBadRequest) + log.Printf("ERROR %s:\n%v", r.RequestURI, err) + } + + // Is it an ndjson url? + if ndjson, err := parseNdjsonURL(r.RequestURI); err == nil { + sendErr(h.ndjson(ctx, ndjson, r.Body)) + } else if resolved, err := parseResolvedURL(r.RequestURI); err == nil { + sendErr(h.resolved(ctx, resolved)) + } else { + http.NotFound(w, r) + } +} + +// ndjson parses an incoming block of ndjson files and stores the +// associated Mutations. This assumes that the underlying +// MutationStore will store duplicate values in an idempotent manner, +// should the request fail partway through. +func (h *Handler) ndjson(ctx context.Context, u ndjsonURL, r io.Reader) error { + muts, release := batches.Mutation() + defer release() + + target, err := ident.Relative(ident.New(u.targetDB), u.targetTable) + if err != nil { + return err + } + + // In immediate mode, we want to apply the mutations immediately. + // The CDC feed guarantees in-order delivery for individual rows. + var flush func() error + if h.Immediate { + applier, err := h.Appliers.Get(ctx, target) + if err != nil { + return err + } + flush = func() error { + err := applier.Apply(ctx, h.Pool, muts) + muts = muts[:0] + return err + } + } else { + store, err := h.Stores.Get(ctx, target) + if err != nil { + return err + } + flush = func() error { + err := store.Store(ctx, h.Pool, muts) + muts = muts[:0] + return err + } + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + buf := scanner.Bytes() + if len(buf) == 0 { + continue + } + mut, err := parseMutation(buf) + if err != nil { + return err + } + muts = append(muts, mut) + if len(muts) == cap(muts) { + flush() + } + } + if err := scanner.Err(); err != nil { + return err + } + return flush() +} + +// resolved acts upon a resolved timestamp message. +func (h *Handler) resolved(ctx context.Context, r resolvedURL) error { + if h.Immediate { + return nil + } + targetDB := ident.New(r.targetDB) + + return retry.Retry(ctx, func(ctx context.Context) error { + tx, err := h.Pool.Begin(ctx) + if err != nil { + return err + } + defer tx.Rollback(ctx) + + watcher, err := h.Watchers.Get(ctx, targetDB) + if err != nil { + return err + } + schema := watcher.Snapshot() + + // Prepare to merge data. + stores := make([]sinktypes.MutationStore, 0, len(schema)) + appliers := make([]sinktypes.Applier, 0, len(schema)) + for table := range schema { + store, err := h.Stores.Get(ctx, table) + if err != nil { + return err + } + stores = append(stores, store) + + applier, err := h.Appliers.Get(ctx, table) + if err != nil { + return err + } + appliers = append(appliers, applier) + } + + prev, err := h.Swapper.Swap(ctx, tx, "_resolved_"+targetDB.Raw(), r.timestamp) + if err != nil { + return err + } + + if hlc.Compare(r.timestamp, prev) < 0 { + return errors.Errorf( + "resolved timestamp went backwards: received %s had %s", + r.timestamp, prev) + } + + for i := range stores { + muts, err := stores[i].Drain(ctx, tx, prev, r.timestamp) + if err != nil { + return err + } + + if err := appliers[i].Apply(ctx, tx, muts); err != nil { + return err + } + } + + return tx.Commit(ctx) + }) +} diff --git a/internal/frontend/cdc/handler_test.go b/internal/frontend/cdc/handler_test.go new file mode 100644 index 00000000..38112434 --- /dev/null +++ b/internal/frontend/cdc/handler_test.go @@ -0,0 +1,142 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +import ( + "strings" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/apply" + "github.com/cockroachdb/cdc-sink/internal/backend/mutation" + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/backend/timestamp" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +func TestHandler(t *testing.T) { + t.Run("deferred", func(t *testing.T) { testHandler(t, false) }) + t.Run("immediate", func(t *testing.T) { testHandler(t, true) }) +} + +func testHandler(t *testing.T, immediate bool) { + t.Helper() + a := assert.New(t) + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + tableInfo, err := sinktest.CreateTable(ctx, dbName, + `CREATE TABLE %s (pk INT PRIMARY KEY, v INT NOT NULL)`) + if !a.NoError(err) { + return + } + + swapper, err := timestamp.New(ctx, dbInfo.Pool(), ident.Resolved) + if !a.NoError(err) { + return + } + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + appliers, cancel := apply.New(watchers) + defer cancel() + + h := &Handler{ + Appliers: appliers, + Immediate: immediate, + Pool: dbInfo.Pool(), + Stores: mutation.New(dbInfo.Pool(), ident.StagingDB), + Swapper: swapper, + Watchers: watchers, + } + + t.Run("smoke", func(t *testing.T) { + a := assert.New(t) + + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(` +{ "after" : { "pk" : 42, "v" : 99 }, "key" : [ 42 ], "updated" : "1.0" } +{ "after" : { "pk" : 99, "v" : 42 }, "key" : [ 99 ], "updated" : "1.0" } +`))) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(2, 0), + })) + + ct, err := tableInfo.RowCount(ctx) + a.NoError(err) + a.Equal(2, ct) + + // Now, delete the data. + + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(` +{ "after" : null, "key" : [ 42 ], "updated" : "3.0" } +{ "key" : [ 99 ], "updated" : "3.0" } +`))) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(5, 0), + })) + + ct, err = tableInfo.RowCount(ctx) + a.NoError(err) + a.Equal(0, ct) + }) + + t.Run("empty-ndjson", func(t *testing.T) { + a := assert.New(t) + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(""))) + }) + + t.Run("resolved-goes-backwards", func(t *testing.T) { + a := assert.New(t) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(50, 0), + })) + err := h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(25, 0), + }) + if immediate { + a.NoError(err) + } else if a.Error(err) { + a.Contains(err.Error(), "backwards") + } + }) +} diff --git a/internal/frontend/cdc/ndjson_url.go b/internal/frontend/cdc/ndjson_url.go new file mode 100644 index 00000000..49af84ea --- /dev/null +++ b/internal/frontend/cdc/ndjson_url.go @@ -0,0 +1,84 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +import ( + "bytes" + "encoding/json" + "fmt" + "regexp" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/pkg/errors" +) + +// See https://www.cockroachlabs.com/docs/stable/create-changefeed.html#general-file-format +// Example: /target//2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1.ndjson +// Format is: /[endpoint]/[date]/[timestamp]-[uniquer]-[topic]-[schema-id] +var ( + ndjsonRegex = regexp.MustCompile(`/(?P[^/]*)/(?P\d{4}-\d{2}-\d{2})/(?P.+)-(?P[^-]+)-(?P[^-]+).ndjson$`) + ndjsonEndpointIdx = ndjsonRegex.SubexpIndex("target") + ndjsonTopicIdx = ndjsonRegex.SubexpIndex("topic") +) + +// ndjsonURL contains all the parsed info from an ndjson url. +type ndjsonURL struct { + targetDB string + targetTable string +} + +func parseNdjsonURL(url string) (ndjsonURL, error) { + match := ndjsonRegex.FindStringSubmatch(url) + if match == nil { + return ndjsonURL{}, fmt.Errorf("can't parse url %s", url) + } + + return ndjsonURL{ + targetDB: match[ndjsonEndpointIdx], + targetTable: match[ndjsonTopicIdx], + }, nil +} + +// parseMutation takes a single line from an ndjson and extracts enough +// information to be able to persist it to the staging table. +func parseMutation(rawBytes []byte) (sinktypes.Mutation, error) { + var payload struct { + After json.RawMessage `json:"after"` + Key json.RawMessage `json:"key"` + Updated string `json:"updated"` + } + + // Large numbers are not turned into strings, so the UseNumber option for + // the decoder is required. + dec := json.NewDecoder(bytes.NewReader(rawBytes)) + dec.UseNumber() + if err := dec.Decode(&payload); err != nil { + return sinktypes.Mutation{}, err + } + + if payload.Updated == "" { + return sinktypes.Mutation{}, + errors.New("CREATE CHANGEFEED must specify the 'WITH updated' option") + } + + // Parse the timestamp into nanos and logical. + ts, err := hlc.Parse(payload.Updated) + if err != nil { + return sinktypes.Mutation{}, err + } + + return sinktypes.Mutation{ + Time: ts, + Data: payload.After, + Key: payload.Key, + }, nil +} diff --git a/internal/frontend/cdc/resolved_url.go b/internal/frontend/cdc/resolved_url.go new file mode 100644 index 00000000..860296bf --- /dev/null +++ b/internal/frontend/cdc/resolved_url.go @@ -0,0 +1,94 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +// This file contains code repackaged from url.go. + +import ( + "fmt" + "regexp" + "strconv" + "time" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/pkg/errors" +) + +// Example: /test.sql/2020-04-04/202004042351304139680000000000000.RESOLVED +// Format is: /[target]/[date]/[timestamp].RESOLVED +var ( + resolvedRegex = regexp.MustCompile( + `^/(?P.*)/(?P\d{4}-\d{2}-\d{2})/(?P\d{33}).RESOLVED$`) + resolvedTargetIdx = resolvedRegex.SubexpIndex("target") + resolvedTimestampIdx = resolvedRegex.SubexpIndex("timestamp") +) + +// resolvedURL contains all the parsed info from an ndjson url. +type resolvedURL struct { + targetDB string + timestamp hlc.Time +} + +func parseResolvedURL(url string) (resolvedURL, error) { + match := resolvedRegex.FindStringSubmatch(url) + if len(match) != resolvedRegex.NumSubexp()+1 { + return resolvedURL{}, fmt.Errorf("can't parse url %s", url) + } + + resolved := resolvedURL{ + targetDB: match[resolvedTargetIdx], + } + + tsText := match[resolvedTimestampIdx] + if len(tsText) != 33 { + return resolvedURL{}, errors.Errorf( + "expected timestamp to be 33 characters long, got %d: %s", + len(tsText), tsText, + ) + } + var err error + resolved.timestamp, err = parseResolvedTimestamp(tsText[:23], tsText[23:]) + return resolved, err +} + +// This is the timestamp format: YYYYMMDDHHMMSSNNNNNNNNNLLLLLLLLLL +// Formatting const stolen from https://github.com/cockroachdb/cockroach/blob/master/pkg/ccl/changefeedccl/sink_cloudstorage.go#L48 +const timestampDateTimeFormat = "20060102150405" + +func parseResolvedTimestamp(timestamp string, logical string) (hlc.Time, error) { + if len(timestamp) != 23 { + return hlc.Time{}, fmt.Errorf("can't parse timestamp %s", timestamp) + } + if len(logical) != 10 { + return hlc.Time{}, fmt.Errorf("can't parse logical timestamp %s", logical) + } + + // Parse the date and time. + timestampParsed, err := time.Parse(timestampDateTimeFormat, timestamp[0:14]) + if err != nil { + return hlc.Time{}, err + } + + // Parse out the nanos + nanos, err := time.ParseDuration(timestamp[14:23] + "ns") + if err != nil { + return hlc.Time{}, err + } + timestampParsed.Add(nanos) + + // Parse out the logical timestamp + logicalParsed, err := strconv.Atoi(logical) + if err != nil { + return hlc.Time{}, err + } + + return hlc.New(timestampParsed.UnixNano(), logicalParsed), nil +} diff --git a/internal/frontend/cdc/url_test.go b/internal/frontend/cdc/url_test.go new file mode 100644 index 00000000..1c00adff --- /dev/null +++ b/internal/frontend/cdc/url_test.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +// This file contains code repackaged from url_test.go. + +import ( + "testing" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/stretchr/testify/assert" +) + +func TestNdjsonURL(t *testing.T) { + a := assert.New(t) + const u = "/target/2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1f.ndjson" + + p, err := parseNdjsonURL(u) + if a.NoError(err) { + a.Equal("target", p.targetDB) + a.Equal("test_table", p.targetTable) + } +} + +func TestResolvedURL(t *testing.T) { + a := assert.New(t) + const u = "/target/2020-04-04/202004042351304139680000000000456.RESOLVED" + + r, err := parseResolvedURL(u) + if a.NoError(err) { + a.Equal("target", r.targetDB) + a.Equal(hlc.New(1586044290000000000, 456), r.timestamp) + } +} diff --git a/internal/frontend/server/integration_test.go b/internal/frontend/server/integration_test.go new file mode 100644 index 00000000..c619b360 --- /dev/null +++ b/internal/frontend/server/integration_test.go @@ -0,0 +1,107 @@ +package server + +import ( + "net/url" + "strings" + "testing" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +func TestIntegration(t *testing.T) { + if testing.Short() { + t.Skip("short tests requested") + } + + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + sourceDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + srv, err := newServer(ctx, "127.0.0.1:0", dbInfo.Pool().Config().ConnString(), false) + if !a.NoError(err) { + return + } + // Run the server loop in the background. + go srv.serve() + + // Set up source and target tables. + source, err := sinktest.CreateTable(ctx, sourceDB, "CREATE TABLE %s (pk INT PRIMARY KEY, val STRING)") + if !a.NoError(err) { + return + } + + target := sinktest.NewTableInfo(dbInfo, ident.NewTable(targetDB, ident.Public, source.Name().Table())) + if !a.NoError(target.Exec(ctx, "CREATE TABLE %s (pk INT PRIMARY KEY, val STRING)")) { + return + } + + // Add base data to the source table. + a.NoError(source.Exec(ctx, "INSERT INTO %s (pk, val) VALUES (1, 'one')")) + ct, err := source.RowCount(ctx) + a.NoError(err) + a.Equal(1, ct) + + // Set up the changefeed. + feedURL := url.URL{ + Scheme: "http", + Host: srv.listener.Addr().String(), + Path: target.Name().Database().Raw(), + } + if strings.Contains(dbInfo.Version(), "v20.2.") || strings.Contains(dbInfo.Version(), "v21.1.") { + feedURL.Scheme = "experimental-http" + } + + if !a.NoError(source.Exec(ctx, + "CREATE CHANGEFEED FOR TABLE %s "+ + "INTO '"+feedURL.String()+"' "+ + "WITH updated,resolved='1s'")) { + return + } + + // Wait for the backfilled value. + for { + ct, err := target.RowCount(ctx) + if !a.NoError(err) { + return + } + if ct >= 1 { + break + } + t.Log("waiting for backfill") + time.Sleep(time.Second) + } + + // Insert an additional value + a.NoError(source.Exec(ctx, "INSERT INTO %s (pk, val) VALUES (2, 'two')")) + ct, err = source.RowCount(ctx) + a.NoError(err) + a.Equal(2, ct) + + // Wait for the streamed value. + for { + ct, err := target.RowCount(ctx) + if !a.NoError(err) { + return + } + if ct >= 2 { + break + } + t.Log("waiting for stream") + time.Sleep(time.Second) + } +} diff --git a/internal/frontend/server/server.go b/internal/frontend/server/server.go new file mode 100644 index 00000000..70994d0f --- /dev/null +++ b/internal/frontend/server/server.go @@ -0,0 +1,153 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package server contains a generic HTTP server that installs +// the CDC listener. +package server + +// This file contains code repackaged from main.go + +import ( + "context" + "flag" + "log" + "net" + "net/http" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/apply" + "github.com/cockroachdb/cdc-sink/internal/backend/mutation" + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/timestamp" + "github.com/cockroachdb/cdc-sink/internal/frontend/cdc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" + "golang.org/x/net/http2" + "golang.org/x/net/http2/h2c" +) + +// Various flags. +var ( + BindAddr = flag.String( + "bindAddr", ":26258", "the network address to bind to") + + ConnectionString = flag.String( + "conn", + "postgresql://root@localhost:26257/defaultdb?sslmode=disable", + "cockroach connection string", + ) + + IgnoreResolved = flag.Bool("ignoreResolved", false, + "write data to the target databae immediately, without "+ + "waiting for resolved timestamps") +) + +// Main is the entry point to the server. +func Main(ctx context.Context) error { + if !flag.Parsed() { + flag.Parse() + } + s, err := newServer(ctx, *BindAddr, *ConnectionString, *IgnoreResolved) + if err != nil { + return err + } + return s.serve() +} + +type server struct { + listener net.Listener + srv *http.Server +} + +// newServer performs all of the setup work that's likely to fail before +// actually serving network requests. +func newServer(ctx context.Context, + bindAddr, connectionString string, + ignoreResolved bool, +) (*server, error) { + cfg, err := pgxpool.ParseConfig(connectionString) + if err != nil { + return nil, errors.Wrapf(err, "could not parse %q", connectionString) + } + // Identify traffic. + cfg.AfterConnect = func(ctx context.Context, conn *pgx.Conn) error { + _, err := conn.Exec(ctx, "SET application_name=$1", "cdc-sink") + return err + } + // Ensure connection diversity through long-lived loadbalancers. + cfg.MaxConnLifetime = 10 * time.Minute + // Keep one spare connection. + cfg.MinConns = 1 + pool, err := pgxpool.ConnectConfig(ctx, cfg) + if err != nil { + return nil, errors.Wrap(err, "could not connect to CockroachDB") + } + + swapper, err := timestamp.New(ctx, pool, ident.Resolved) + if err != nil { + return nil, err + } + + watchers, cancelWatchers := schemawatch.NewWatchers(pool) + appliers, cancelAppliers := apply.New(watchers) + + mux := &http.ServeMux{} + mux.HandleFunc("/_/healthz", func(w http.ResponseWriter, r *http.Request) { + if err := pool.Ping(r.Context()); err != nil { + log.Printf("health check failed: %v", err) + http.Error(w, "health check failed", http.StatusInternalServerError) + return + } + http.Error(w, "OK", http.StatusOK) + }) + mux.Handle("/", &cdc.Handler{ + Appliers: appliers, + Immediate: ignoreResolved, + Pool: pool, + Stores: mutation.New(pool, ident.StagingDB), + Swapper: swapper, + Watchers: watchers, + }) + + l, err := net.Listen("tcp", bindAddr) + if err != nil { + return nil, errors.Wrapf(err, "could not bind to %q", bindAddr) + } + + log.Printf("listening on %s", l.Addr()) + srv := &http.Server{ + Handler: h2c.NewHandler(logWrapper(mux), &http2.Server{}), + } + go func() { + <-ctx.Done() + log.Println("server shutting down") + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := srv.Shutdown(ctx); err != nil { + log.Printf("error during server shutdown: %v", err) + } + l.Close() + cancelAppliers() + cancelWatchers() + pool.Close() + }() + + return &server{l, srv}, nil +} + +func (s *server) serve() error { + err := s.srv.Serve(s.listener) + if errors.Is(err, http.ErrServerClosed) { + return nil + } + return err +} diff --git a/internal/frontend/server/wrapper.go b/internal/frontend/server/wrapper.go new file mode 100644 index 00000000..256a2083 --- /dev/null +++ b/internal/frontend/server/wrapper.go @@ -0,0 +1,41 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package server + +import ( + "log" + "net/http" + "time" +) + +type responseSpy struct { + http.ResponseWriter + statusCode int +} + +func (s *responseSpy) WriteHeader(statusCode int) { + s.statusCode = statusCode + s.ResponseWriter.WriteHeader(statusCode) +} + +func logWrapper(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + spy := &responseSpy{w, 0} + start := time.Now() + h.ServeHTTP(spy, r) + elapsed := time.Since(start) + log.Printf("http status %d %s: %s in %s", + spy.statusCode, + http.StatusText(spy.statusCode), + r.URL.Path, + elapsed) + }) +} diff --git a/internal/sinktypes/sinktypes.go b/internal/sinktypes/sinktypes.go new file mode 100644 index 00000000..50855ad5 --- /dev/null +++ b/internal/sinktypes/sinktypes.go @@ -0,0 +1,114 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package sinktypes contains data types and interfaces that define the +// major functional blocks of code within cdc-sink. The goal of placing +// the types into this package is to make it easy to compose +// functionality as the cdc-sink project evolves. +package sinktypes + +import ( + "bytes" + "context" + "encoding/json" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" +) + +// An Applier accepts some number of Mutations and applies them to +// a target table. +type Applier interface { + Apply(context.Context, Batcher, []Mutation) error +} + +// Appliers is a factory for Applier instances. +type Appliers interface { + Get(ctx context.Context, target ident.Table) (Applier, error) +} + +// A Batcher allows for a batch of statements to be executed in a single +// round-trip to the database. This is implemented by several pgx types, +// such as pgxpool.Pool and pgx.Tx. +type Batcher interface { + pgxtype.Querier + SendBatch(ctx context.Context, batch *pgx.Batch) pgx.BatchResults +} + +// A Mutation describes a row to upsert into the target database. That +// is, it is a collection of column values to apply to a row in some +// table. +type Mutation struct { + Data json.RawMessage // An encoded JSON object: { "key" : "hello" } + Key json.RawMessage // An encoded JSON array: [ "hello" ] + Time hlc.Time // The effective time of the mutation +} + +var nullBytes = []byte("null") + +// Delete returns true if the Mutation represents a deletion. +func (m Mutation) Delete() bool { + return len(m.Data) == 0 || bytes.Equal(m.Data, nullBytes) +} + +// MutationStore describes a service which can durably persist some +// number of Mutations. +type MutationStore interface { + // Drain will delete queued mutations. It is not idempotent. + Drain(ctx context.Context, tx pgxtype.Querier, prev, next hlc.Time) ([]Mutation, error) + + // Store implementations should be idempotent. + Store(ctx context.Context, db Batcher, muts []Mutation) error +} + +// MutationStores is a factory for MutationStore instances. +type MutationStores interface { + Get(ctx context.Context, target ident.Table) (MutationStore, error) +} + +// A TimeSwapper maintains a durable map of string keys to timestamps. +type TimeSwapper interface { + // Swap stores a new timestamp for the given key, returning the + // previous value. If no previous value was present, hlc.Zero() will + // be returned. + Swap(context.Context, pgxtype.Querier, string, hlc.Time) (hlc.Time, error) +} + +// ColData hold SQL column metadata. +type ColData struct { + Ignored bool + Name ident.Ident + Primary bool + Type string +} + +// Watcher allows table metadata to be observed. +// +// The methods in this type return column data such that primary key +// columns are returned first, in their declaration order, followed +// by all other non-pk columns. +type Watcher interface { + // Refresh will force the Watcher to immediately query the database + // for updated schema information. This is intended for testing and + // does not need to be called in the general case. + Refresh(context.Context, pgxtype.Querier) error + // Snapshot returns the latest known schema for all tables. + Snapshot() map[ident.Table][]ColData + // Watch returns a channel that emits updated column data for + // the given table. The channel will be closed if there + Watch(table ident.Table) (_ <-chan []ColData, cancel func(), _ error) +} + +// Watchers is a factory for Watcher instances. +type Watchers interface { + Get(ctx context.Context, db ident.Ident) (Watcher, error) +} diff --git a/internal/util/batches/batches.go b/internal/util/batches/batches.go new file mode 100644 index 00000000..eafd6001 --- /dev/null +++ b/internal/util/batches/batches.go @@ -0,0 +1,104 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package batches contains support code for working with and testing +// batches of data. +package batches + +import ( + "flag" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" +) + +const defaultSize = 1000 + +var batchSize = flag.Int("batchSize", defaultSize, "default size for batched operations") + +// Batch is a helper to perform some operation over a large number +// of values in a batch-oriented fashion. The indexes provided to +// the callback function are a half-open range [begin , end). +func Batch(count int, fn func(begin, end int) error) error { + consume := Size() + idx := 0 + for { + if consume > count { + consume = count + } + if err := fn(idx, idx+consume); err != nil { + return err + } + if consume == count { + return nil + } + idx += consume + count -= consume + } +} + +// Size returns the default size for batch operations. Testing code +// should generally use a multiple of this value to ensure that +// batching has been correctly implemented. +func Size() int { + x := batchSize + if x == nil { + return defaultSize + } + return *x +} + +// The Release function must be called to return the underlying array +// back to the pool. +type Release func() + +var intPool = &sync.Pool{New: func() interface{} { + x := make([]int, 0, Size()) + return &x +}} + +// Int returns a slice of Size() capacity. +func Int() ([]int, Release) { + ret := intPool.Get().(*[]int) + return *ret, func() { intPool.Put(ret) } +} + +var int64Pool = &sync.Pool{New: func() interface{} { + x := make([]int64, 0, Size()) + return &x +}} + +// Int64 returns a slice of Size() capacity. +func Int64() ([]int64, Release) { + ret := int64Pool.Get().(*[]int64) + return *ret, func() { int64Pool.Put(ret) } +} + +var mutationPool = &sync.Pool{New: func() interface{} { + x := make([]sinktypes.Mutation, 0, Size()) + return &x +}} + +// Mutation returns a slice of Size() capacity. +func Mutation() ([]sinktypes.Mutation, Release) { + ret := mutationPool.Get().(*[]sinktypes.Mutation) + return *ret, func() { mutationPool.Put(ret) } +} + +var stringPool = &sync.Pool{New: func() interface{} { + x := make([]string, 0, Size()) + return &x +}} + +// String returns a slice of Size() capacity. +func String() ([]string, Release) { + ret := stringPool.Get().(*[]string) + return *ret, func() { stringPool.Put(ret) } +} diff --git a/internal/util/hlc/hlc.go b/internal/util/hlc/hlc.go new file mode 100644 index 00000000..07fb65f9 --- /dev/null +++ b/internal/util/hlc/hlc.go @@ -0,0 +1,87 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package hlc contains a trivial representation of CockroachDB's hybrid +// logical clock timestamp. +package hlc + +// The code in this file is reworked from sink_table.go. + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" +) + +// Time is a representation of the hybrid logical clock timestamp used +// by CockroachDB. This is an immutable value type, suitable for use as +// a map key. +type Time struct { + nanos int64 + logical int +} + +// Compare two timestamps. +func Compare(a, b Time) int { + if c := a.nanos - b.nanos; c != 0 { + return int(c) + } + return a.logical - b.logical +} + +// From constructs an HLC time from a wall time. +func From(t time.Time) Time { + return Time{t.UnixNano(), 0} +} + +// New constructs a new Time with wall and logical parts. +func New(nanos int64, logical int) Time { + return Time{nanos, logical} +} + +// Parse splits a timestmap of the format NNNN.LLL into an int64 +// for the nanos and an int for the logical component. +func Parse(timestamp string) (Time, error) { + splits := strings.Split(timestamp, ".") + if len(splits) != 2 { + return Time{}, errors.Errorf("can't parse timestamp %s", timestamp) + } + nanos, err := strconv.ParseInt(splits[0], 0, 0) + if err != nil { + return Time{}, err + } + if nanos <= 0 { + return Time{}, errors.Errorf("nanos must be greater than 0: %d", nanos) + } + logical, err := strconv.Atoi(splits[1]) + if len(splits[1]) != 10 && logical != 0 { + return Time{}, errors.Errorf("logical part %q must be 10 digits or zero-valued", splits[1]) + } + return Time{nanos, logical}, err +} + +// Zero returns a zero-valued Time. +func Zero() Time { + return Time{} +} + +// Logical returns the logical counter. +func (t Time) Logical() int { return t.logical } + +// Nanos returns the nanosecond wall time. +func (t Time) Nanos() int64 { return t.nanos } + +// String returns the Time as a +func (t Time) String() string { + return fmt.Sprintf("%d.%010d", t.nanos, t.logical) +} diff --git a/internal/util/hlc/hlc_test.go b/internal/util/hlc/hlc_test.go new file mode 100644 index 00000000..469ddc66 --- /dev/null +++ b/internal/util/hlc/hlc_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package hlc + +import ( + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCompare(t *testing.T) { + a := assert.New(t) + + a.True(Compare(Time{1, 1}, Time{1, 1}) == 0) + + a.True(Compare(Time{2, 1}, Time{1, 1}) > 0) + a.True(Compare(Time{1, 1}, Time{2, 1}) < 0) + + a.True(Compare(Time{1, 2}, Time{1, 1}) > 0) + a.True(Compare(Time{1, 1}, Time{1, 2}) < 0) +} + +func TestParse(t *testing.T) { + // Implementation copied from sink_table_test.go + + tests := []struct { + testcase string + expectedPass bool + expectedNanos int64 + expectedLogical int + }{ + {"", false, 0, 0}, + {".", false, 0, 0}, + {"1233", false, 0, 0}, + {".1233", false, 0, 0}, + {"123.123", false, 123, 123}, + {"0.0", false, 0, 0}, + {"1586019746136571000.0000000000", true, 1586019746136571000, 0}, + {"1586019746136571000.0000000001", true, 1586019746136571000, 1}, + {"9223372036854775807.2147483647", true, math.MaxInt64, math.MaxInt32}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { + a := assert.New(t) + actual, actualErr := Parse(test.testcase) + if test.expectedPass && a.NoError(actualErr) { + a.Equal(test.expectedNanos, actual.Nanos(), "nanos") + a.Equal(test.expectedLogical, actual.Logical(), "logical") + a.Equal(test.testcase, actual.String()) + } else if !test.expectedPass { + a.Error(actualErr) + } + }) + } +} diff --git a/internal/util/ident/ident.go b/internal/util/ident/ident.go new file mode 100644 index 00000000..c8361639 --- /dev/null +++ b/internal/util/ident/ident.go @@ -0,0 +1,110 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package ident contains types for safely representing SQL identifiers. +package ident + +import ( + "fmt" + "strings" + + "github.com/pkg/errors" +) + +// Well-known identifiers. +var ( + StagingDB = New("_cdc_sink") // "_cdc_sink" + Public = New("public") // "public" + Resolved = NewTable(StagingDB, Public, New("resolved")) +) + +// An Ident is a quoted SQL identifier, generally a table, column, or +// database. This type is an immutable value type, suitable for use as a +// map key. +type Ident struct { + q string +} + +// New returns a quoted SQL identifier. This method will panic if an +// empty string is passed. +func New(raw string) Ident { + return Ident{`"` + strings.ReplaceAll(raw, `"`, `""`) + `"`} +} + +// Newf returns a quoted SQL identifier. +func Newf(format string, args ...interface{}) Ident { + return New(fmt.Sprintf(format, args...)) +} + +// Relative parses a table name and returns a fully-qualified Table +// name whose database value is always db. +// +// If the input table name is a simple string or has exactly two parts, +// the resulting Table will have the form "db.public.table". +// +// If the input table has three parts, it will be interpreted as a +// fully-qualified +func Relative(db Ident, table string) (Table, error) { + if table == "" { + return Table{}, errors.New("empty table") + } + + parts := strings.Split(table, ".") + switch len(parts) { + case 1: + return Table{db, Public, New(parts[0])}, nil + case 2: + return Table{db, Public, New(parts[1])}, nil + case 3: + return Table{db, New(parts[1]), New(parts[2])}, nil + default: + return Table{}, errors.Errorf("too many parts in %q", table) + } +} + +// Empty returns true if the identifier is empty. +func (n Ident) Empty() bool { + return n.q == `""` +} + +// Raw returns the original, raw value. +func (n Ident) Raw() string { + return strings.ReplaceAll(n.q[1:len(n.q)-1], `""`, `"`) +} + +// String returns the ident in a manner suitable for constructing a query. +func (n Ident) String() string { return n.q } + +// A Table identifier is a three-part ident, consisting of an SQL +// database, schema, and table ident. This type is an immutable value +// type, suitable for use as a map key. +type Table struct { + db, schema, table Ident +} + +// NewTable constructs a Table identifier. +func NewTable(db, schema, table Ident) Table { + return Table{db, schema, table} +} + +// Database returns the table's enclosing database. +func (t Table) Database() Ident { return t.db } + +// Schema returns the table's enclosing schema. +func (t Table) Schema() Ident { return t.schema } + +// Table returns the table's identifier. +func (t Table) Table() Ident { return t.table } + +// String returns the identifier in a manner suitable for constructing a +// query. +func (t Table) String() string { + return fmt.Sprintf("%s.%s.%s", t.Database(), t.Schema(), t.Table()) +} diff --git a/internal/util/ident/ident_test.go b/internal/util/ident/ident_test.go new file mode 100644 index 00000000..71cb6a6f --- /dev/null +++ b/internal/util/ident/ident_test.go @@ -0,0 +1,82 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package ident + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIdent(t *testing.T) { + a := assert.New(t) + + a.True(New("").Empty()) + + id := New("table") + a.Equal("table", id.Raw()) + a.Equal(`"table"`, id.String()) + a.False(id.Empty()) + + a.Equal(id, New("table")) + + a.Equal(`"foo!bar"`, New("foo!bar").String()) +} + +func TestQualified(t *testing.T) { + a := assert.New(t) + + id := NewTable(New("database"), New("schema"), New("table")) + a.Equal(`"database"."schema"."table"`, id.String()) +} + +func TestRelative(t *testing.T) { + foo := New("foo") + + tcs := []struct { + table string + expected Table + expectError bool + }{ + { + table: "", + expectError: true, + }, + { + table: "foo", + expected: NewTable(StagingDB, Public, foo), + }, + { + table: "other.foo", + expected: NewTable(StagingDB, Public, foo), + }, + { + table: "other.schema.foo", + expected: NewTable(StagingDB, New("schema"), foo), + }, + { + table: "other.wat.schema.foo", + expectError: true, + }, + } + + for _, tc := range tcs { + t.Run(tc.table, func(t *testing.T) { + a := assert.New(t) + parsed, err := Relative(StagingDB, tc.table) + if tc.expectError { + a.Error(err) + return + } + a.Equal(tc.expected, parsed) + }) + } +} diff --git a/util.go b/internal/util/retry/retry.go similarity index 54% rename from util.go rename to internal/util/retry/retry.go index b96b650d..eb1fcd68 100644 --- a/util.go +++ b/internal/util/retry/retry.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2021 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,7 +8,8 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package main +// Package retry contains utility code for retrying database transactions. +package retry // This code is taken from the Cacheroach project. @@ -16,6 +17,7 @@ import ( "context" "github.com/jackc/pgconn" + "github.com/jackc/pgtype/pgxtype" "github.com/pkg/errors" ) @@ -23,13 +25,18 @@ import ( type Marker bool // Mark sets the flag. -func (m *Marker) Mark() { - *m = true -} +func (m *Marker) Mark() { *m = true } // Marked returns the flag status. -func (m *Marker) Marked() bool { - return bool(*m) +func (m *Marker) Marked() bool { return bool(*m) } + +// Execute is a wrapper around Retry that can be used for sql +// queries that don't have any return values. +func Execute(ctx context.Context, db pgxtype.Querier, query string, args ...interface{}) error { + return Retry(ctx, func(ctx context.Context) error { + _, err := db.Exec(ctx, query, args...) + return err + }) } // Retry is a convenience wrapper to automatically retry idempotent @@ -37,21 +44,35 @@ func (m *Marker) Marked() bool { // failure. The provided callback must be entirely idempotent, with // no observable side-effects during its execution. func Retry(ctx context.Context, idempotent func(context.Context) error) error { - return RetryLoop(ctx, func(ctx context.Context, _ *Marker) error { + return Loop(ctx, func(ctx context.Context, _ *Marker) error { return idempotent(ctx) }) } -// RetryLoop is a convenience wrapper to automatically retry idempotent -// database operations that experience a transaction or or connection +// inLoop is a key used by Loop to detect reentrant behavior. +var inLoop struct{} + +// Loop is a convenience wrapper to automatically retry idempotent +// database operations that experience a transaction or a connection // failure. The provided callback may indicate that it has started // generating observable effects (e.g. sending result data) by calling // its second parameter to disable the retry behavior. -func RetryLoop(ctx context.Context, fn func(ctx context.Context, sideEffect *Marker) error) error { +// +// If Loop is called in a reentrant fashion, the retry behavior will be +// suppressed within an inner loop, allowing the retryable error to +// percolate into the outer loop. +func Loop( + ctx context.Context, + fn func(ctx context.Context, sideEffect *Marker) error, +) error { + top := ctx.Value(inLoop) == nil + if top { + ctx = context.WithValue(ctx, inLoop, inLoop) + } var sideEffect Marker for { err := fn(ctx, &sideEffect) - if err == nil || sideEffect.Marked() { + if err == nil || sideEffect.Marked() || !top { return err } diff --git a/main.go b/main.go index 5672d6bf..224d476e 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2021 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -12,77 +12,16 @@ package main import ( "context" - "encoding/json" "flag" "fmt" "log" - "net" - "net/http" + "os" "os/signal" "runtime" "runtime/debug" "syscall" - "time" - "github.com/jackc/pgx/v4/pgxpool" - "golang.org/x/net/http2" - "golang.org/x/net/http2/h2c" -) - -var connectionString = flag.String( - "conn", - "postgresql://root@localhost:26257/defaultdb?sslmode=disable", - "cockroach connection string", -) -var port = flag.Int("port", 26258, "http server listening port") - -var sinkDB = flag.String("sink_db", "_CDC_SINK", "db for storing temp sink tables") -var dropDB = flag.Bool("drop", false, "Drop the sink db before starting?") -var sinkDBZone = flag.Bool( - "sink_db_zone_override", - true, - "allow sink_db zone config to be overridden with the cdc-sink default values", -) - -var configuration = flag.String( - "config", - "", - `This flag must be set. It requires a single line for each table passed in. -The format is the following: -[ - {"endpoint":"", "source_table":"", "destination_database":"", "destination_table":""}, - {"endpoint":"", "source_table":"", "destination_database":"", "destination_table":""}, -] - -Each table being updated requires a single line. Note that source database is -not required. -Each changefeed requires the same endpoint and you can have more than one table -in a single changefeed. - -Here are two examples: - -1) Single table changefeed. Source table and destination table are both called -users: - -[{endpoint:"cdc.sql", source_table:"users", destination_database:"defaultdb", destination_table:"users"}] - -The changefeed is initialized on the source database: -CREATE CHANGEFEED FOR TABLE users INTO 'experimental-[cdc-sink-url:port]/cdc.sql' WITH updated,resolved - -2) Two table changefeed. Two tables this time, users and customers: - -[ - {"endpoint":"cdc.sql", "source_table":"users", "destination_database":"defaultdb", "destination_table":"users"}, - {"endpoint":"cdc.sql", "source_table":"customers", "destination_database":"defaultdb", "destination_table":"customers"}, -] - -The changefeed is initialized on the source database: -CREATE CHANGEFEED FOR TABLE users,customers INTO 'experimental-[cdc-sink-url:port]/cdc.sql' WITH updated,resolved - -As of right now, only a single endpoint is supported. - -Don't forget to escape the json quotes: -./cdc-sink --config="[{\"endpoint\":\"test.sql\", \"source_table\":\"in_test1\", \"destination_database\":\"defaultdb\", \"destination_table\":\"out_test1\"},{\"endpoint\":\"test.sql\", \"source_table\":\"in_test2\", \"destination_database\":\"defaultdb\", \"destination_table\":\"out_test2\"}]"`, + "github.com/cockroachdb/cdc-sink/internal/frontend/server" ) var ( @@ -91,88 +30,9 @@ var ( printVersion = flag.Bool("version", false, "print version and exit") ) -func createHandler(db *pgxpool.Pool, sinks *Sinks) func(http.ResponseWriter, *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - // Is it an ndjson url? - ndjson, ndjsonErr := parseNdjsonURL(r.RequestURI) - if ndjsonErr == nil { - sink := sinks.FindSink(ndjson.endpoint, ndjson.topic) - if sink != nil { - sink.HandleRequest(db, w, r) - return - } - - // No sink found, throw an error. - http.Error( - w, - fmt.Sprintf("could not find a sync for %s", ndjson.topic), - http.StatusInternalServerError, - ) - return - } - - // Is it a resolved url? - resolved, resolvedErr := parseResolvedURL(r.RequestURI) - if resolvedErr == nil { - sinks.HandleResolvedRequest(r.Context(), db, resolved, w, r) - return - } - - // Could not recognize url. - http.Error( - w, - fmt.Sprintf("URL pattern does not match either an ndjson (%s) or a resolved (%s)", - ndjsonErr, resolvedErr, - ), - http.StatusInternalServerError, - ) - } -} - -// Config parses the passed in config. -type Config []ConfigEntry - -// ConfigEntry is a single table configuration entry in a config. -type ConfigEntry struct { - Endpoint string `json:"endpoint"` - SourceTable string `json:"source_table"` - DestinationDatabase string `json:"destination_database"` - DestinationTable string `json:"destination_table"` -} - -func parseConfig(rawConfig string) (Config, error) { - var config Config - if err := json.Unmarshal([]byte(rawConfig), &config); err != nil { - return Config{}, fmt.Errorf("could not parse config: %s", err.Error()) - } - - if len(config) == 0 { - return Config{}, fmt.Errorf("no config lines provided") - } - - for _, entry := range config { - if len(entry.Endpoint) == 0 { - return Config{}, fmt.Errorf("each config entry requires and endpoint") - } - - if len(entry.SourceTable) == 0 { - return Config{}, fmt.Errorf("each config entry requires a source_table") - } - - if len(entry.DestinationDatabase) == 0 { - return Config{}, fmt.Errorf("each config entry requires a destination_database") - } - - if len(entry.DestinationTable) == 0 { - return Config{}, fmt.Errorf("each config entry requires a destination_table") - } - } - - return config, nil -} - func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer cancel() // First, parse the config. flag.Parse() @@ -193,49 +53,9 @@ func main() { return } - config, err := parseConfig(*configuration) - if err != nil { - log.Print(*configuration) - log.Fatal(err) - } - - db, err := pgxpool.Connect(ctx, *connectionString) - if err != nil { - log.Fatalf("could not parse config string: %v", err) - } - defer db.Close() - - if *dropDB { - if err := DropSinkDB(ctx, db); err != nil { - log.Fatalf("Could not drop the sinkDB:%s - %v", *sinkDB, err) - } - } - - if err := CreateSinkDB(ctx, db); err != nil { - log.Fatalf("Could not create the sinkDB:%s - %v", *sinkDB, err) - } - - sinks, err := CreateSinks(ctx, db, config) - if err != nil { - log.Fatal(err) + if err := server.Main(ctx); err != nil { + log.Printf("server exited: %v", err) + os.Exit(1) } - - l, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) - if err != nil { - log.Fatalf("could not open listener: %v", err) - } - log.Printf("listening on %s", l.Addr()) - - handler := http.Handler(http.HandlerFunc(createHandler(db, sinks))) - handler = h2c.NewHandler(handler, &http2.Server{}) - - // TODO(bob): Consider configuring timeouts - svr := &http.Server{Handler: handler} - go svr.Serve(l) - <-ctx.Done() - log.Printf("waiting for connections to drain") - cancel() - ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) - _ = svr.Shutdown(ctx) - cancel() + os.Exit(0) } diff --git a/main_test.go b/main_test.go deleted file mode 100644 index e43c9dd5..00000000 --- a/main_test.go +++ /dev/null @@ -1,1712 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "io" - "io/ioutil" - "math/rand" - "net/http" - "net/http/httptest" - "reflect" - "strings" - "testing" - "time" - - "log" - "os" - - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" - "github.com/pkg/errors" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. -var ( - r *rand.Rand - rawDb *pgxpool.Pool - dbVersion string -) - -// TestMain will open a database connection and set the cluster license -// if the COCKROACH_DEV_LICENSE environment variable is set. -func TestMain(m *testing.M) { - r = rand.New(rand.NewSource(time.Now().UnixNano())) - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - var err error - rawDb, err = pgxpool.Connect(ctx, *connectionString) - if err != nil { - log.Fatalf("could not open database connection: %v", err) - } - - if lic, ok := os.LookupEnv("COCKROACH_DEV_LICENSE"); ok { - if _, err := rawDb.Exec(ctx, - "SET CLUSTER SETTING cluster.organization = $1", - "Cockroach Labs - Production Testing", - ); err != nil { - log.Fatalf("could not set cluster.organization: %v", err) - } - if _, err := rawDb.Exec(ctx, - "SET CLUSTER SETTING enterprise.license = $1", lic, - ); err != nil { - log.Fatalf("could not set enterprise.license: %v", err) - } - } - - if err := Execute(ctx, rawDb, "SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { - log.Fatalf("could not enable rangefeeds: %v", err) - return - } - - if err := Retry(ctx, func(ctx context.Context) error { - row := rawDb.QueryRow(ctx, "SELECT version()") - if err := row.Scan(&dbVersion); err != nil { - return err - } - - return nil - }); err != nil { - log.Fatalf("could not determine cluster version: %v", err) - } - - os.Exit(m.Run()) -} - -const endpointTest = "test.sql" - -// getDB creates a new testing DB, return the name of that db and a closer that -// will drop the table and close the db connection. -func getDB(ctx context.Context) (db *pgxpool.Pool, dbName string, closer func(), err error) { - db = rawDb - // Create the testing database - dbNum := r.Intn(10000) - dbName = fmt.Sprintf("_test_db_%d", dbNum) - - if err = Execute(ctx, db, fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", dbName)); err != nil { - return - } - - if err = Execute(ctx, db, fmt.Sprintf(sinkDBZoneConfig, dbName)); err != nil { - return - } - - closer = func() { - _ = Execute(ctx, db, fmt.Sprintf("DROP DATABASE %s CASCADE", dbName)) - } - - return -} - -func getRowCount(ctx context.Context, db *pgxpool.Pool, fullTableName string) (int, error) { - var count int - if err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", fullTableName)).Scan(&count) - }); err != nil { - return 0, err - } - return count, nil -} - -type tableInfo struct { - db *pgxpool.Pool - dbName string - name string -} - -func (ti tableInfo) String() string { - return fmt.Sprintf("%s.%s", ti.dbName, ti.name) -} - -func (ti tableInfo) getFullName() string { - return fmt.Sprintf("%s.%s", ti.dbName, ti.name) -} - -func (ti *tableInfo) deleteAll(ctx context.Context) error { - return Execute(ctx, ti.db, fmt.Sprintf("DELETE FROM %s WHERE true", ti.getFullName())) -} - -func (ti tableInfo) getTableRowCount(ctx context.Context) (int, error) { - return getRowCount(ctx, ti.db, ti.getFullName()) -} - -func (ti tableInfo) dropTable(ctx context.Context) error { - return Execute(ctx, ti.db, fmt.Sprintf("DROP TABLE IF EXISTS %s", ti.getFullName())) -} - -// This function creates a test table and returns a unique name. -// The schemaSpec parameter must have exactly two %s substitution -// parameters for the database name and table name. -func createTestTable(ctx context.Context, db *pgxpool.Pool, dbName, schemaSpec string) (tableInfo, error) { - var tableName string - -outer: - for { - // Create the testing database - tableNum := r.Intn(10000) - tableName = fmt.Sprintf("_test_table_%d", tableNum) - - // Find the DB. - var actualTableName string - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = $1", dbName), - tableName, - ).Scan(&actualTableName) - }) - switch err { - case pgx.ErrNoRows: - break outer - case nil: - continue - default: - return tableInfo{}, err - } - } - - if err := Execute(ctx, db, fmt.Sprintf(schemaSpec, dbName, tableName)); err != nil { - return tableInfo{}, err - } - - return tableInfo{ - db: db, - dbName: dbName, - name: tableName, - }, nil -} - -type tableInfoSimple struct { - tableInfo - rowCount int -} - -const tableSimpleSchema = ` -CREATE TABLE %s.%s ( - a INT PRIMARY KEY, - b INT -) -` - -func createTestSimpleTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoSimple, error) { - info, err := createTestTable(ctx, db, dbName, tableSimpleSchema) - return tableInfoSimple{tableInfo: info}, err -} - -func (tis *tableInfoSimple) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - tis.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1)", tis.getFullName()), - tis.rowCount+1, - ); err != nil { - return err - } - tis.rowCount++ - } - return nil -} - -func (tis *tableInfoSimple) updateNoneKeyColumns(ctx context.Context) error { - return Execute( - ctx, - tis.db, - fmt.Sprintf("UPDATE %s SET b=b*100 WHERE true", tis.getFullName()), - ) -} - -func (tis *tableInfoSimple) updateAll(ctx context.Context) error { - return Execute( - ctx, - tis.db, - fmt.Sprintf("UPDATE %s SET a=a*100000, b=b*100000 WHERE true", tis.getFullName()), - ) -} - -func (tis *tableInfoSimple) maxB(ctx context.Context) (int, error) { - var max int - err := Retry(ctx, func(ctx context.Context) error { - return tis.db.QueryRow( - ctx, - fmt.Sprintf("SELECT max(b) FROM %s", tis.getFullName()), - ).Scan(&max) - }) - return max, err -} - -// tableInfoComposite is a table with a composite primary key. -type tableInfoComposite struct { - tableInfo - rowCount int -} - -const tableCompositeSchema = ` -CREATE TABLE %s.%s ( - a INT, - b INT, - c INT, - PRIMARY KEY (a, b) -) -` - -func createTestCompositeTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoComposite, error) { - info, err := createTestTable(ctx, db, dbName, tableCompositeSchema) - return tableInfoComposite{tableInfo: info}, err -} - -func (tis *tableInfoComposite) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - tis.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1, $1)", tis.getFullName()), - tis.rowCount+1, - ); err != nil { - return err - } - tis.rowCount++ - } - return nil -} - -type tableInfoClob struct { - tableInfo - clobSize int // The number of bytes to generate per row. - rowCount int // A running total for code generation. -} - -const tableClobSchema = ` -CREATE TABLE %s.%s ( - a INT NOT NULL PRIMARY KEY, - data TEXT -) -` - -func createTestClobTable(ctx context.Context, db *pgxpool.Pool, dbName string, clobSize int) (tableInfoClob, error) { - if clobSize <= 0 { - clobSize = 8 * 1024 - } - info, err := createTestTable(ctx, db, dbName, tableClobSchema) - return tableInfoClob{tableInfo: info, clobSize: clobSize}, err -} - -func (tic *tableInfoClob) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - c := tic.rowCount + 1 - data, err := ioutil.ReadAll(clobData(tic.clobSize, c)) - if err != nil { - return err - } - if err := Execute( - ctx, - tic.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $2)", tic.getFullName()), - c, - string(data), - ); err != nil { - return err - } - tic.rowCount++ - } - return nil -} - -// tableInfoComputed is used for tables that have magic columns -type tableInfoComputed struct { - tableInfo - rowCount int -} - -const tableComputedSchema = ` -SET experimental_enable_hash_sharded_indexes = on; -CREATE TABLE %s.%s ( - a INT PRIMARY KEY, - b INT, - c INT AS (a + b) STORED, - d INT AS (a + b) VIRTUAL, - INDEX (b ASC) USING HASH WITH BUCKET_COUNT = 8 -) -` - -func createTestComputedTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoComputed, error) { - info, err := createTestTable(ctx, db, dbName, tableComputedSchema) - return tableInfoComputed{tableInfo: info}, err -} - -func (ti *tableInfoComputed) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - ti.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1)", ti.getFullName()), - ti.rowCount+1, - ); err != nil { - return err - } - ti.rowCount++ - } - return nil -} - -type jobInfo struct { - db *pgxpool.Pool - id int -} - -func (ji *jobInfo) cancelJob(ctx context.Context) error { - if ji.id == 0 { - return nil - } - if err := Execute(ctx, ji.db, fmt.Sprintf("CANCEL JOB %d", ji.id)); err != nil { - return err - } - ji.id = 0 - return nil -} - -func createChangeFeed( - ctx context.Context, db *pgxpool.Pool, url string, endpoint string, tis ...tableInfo, -) (jobInfo, error) { - var query strings.Builder - fmt.Fprint(&query, "CREATE CHANGEFEED FOR TABLE ") - for i := 0; i < len(tis); i++ { - if i != 0 { - fmt.Fprint(&query, ", ") - } - fmt.Fprint(&query, tis[i].getFullName()) - } - fmt.Fprintf(&query, " INTO 'experimental-%s/%s' WITH updated,resolved", url, endpoint) - var jobID int - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, query.String()).Scan(&jobID) - }) - return jobInfo{ - db: db, - id: jobID, - }, err -} - -// dropSinkDB is just a wrapper around DropSinkDB for testing. -func dropSinkDB(ctx context.Context, db *pgxpool.Pool) error { - return DropSinkDB(ctx, db) -} - -// createSinkDB will first drop then create a new sink db. -func createSinkDB(ctx context.Context, db *pgxpool.Pool) error { - if err := dropSinkDB(ctx, db); err != nil { - return err - } - return CreateSinkDB(ctx, db) -} - -// TestDB is just a quick test to create and drop a database to ensure the -// Cockroach Cluster is working correctly and we have the correct permissions. -func TestDB(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Find the DB. - var actualDBName string - if err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow( - ctx, - `SELECT database_name FROM [SHOW DATABASES] WHERE database_name = $1`, dbName, - ).Scan(&actualDBName) - }); !a.NoError(err) { - return - } - - if !a.Equal(actualDBName, dbName, "db names do not match") { - return - } - - // Create a test table and insert some rows - table, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - if !a.NoError(table.populateTable(ctx, 10)) { - return - } - count, err := table.getTableRowCount(ctx) - a.Equal(10, count, "row count") - a.NoError(err) -} - -func createConfig(source tableInfo, destination tableInfo, endpoint string) Config { - return Config{ - ConfigEntry{ - Endpoint: endpoint, - SourceTable: source.name, - DestinationDatabase: destination.dbName, - DestinationTable: destination.name, - }, - } -} - -func TestFeedInsert(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - count, err := tableFrom.getTableRowCount(ctx) - a.Equal(10, count, "rows") - if !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - // Wait for sync to occur. - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - for { - if !a.NoError(ctx.Err()) { - return - } - toCount, err := tableTo.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - fromCount, err := tableFrom.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if toCount == fromCount { - break - } - } - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - // Wait for sync to occur again. - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "sink table not empty") - a.NoError(err) -} - -func TestFeedDelete(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.deleteAll(ctx)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedDeleteCompositeKey(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestCompositeTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestCompositeTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer(http.HandlerFunc(handler)) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.deleteAll(ctx)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedUpdateNonPrimary(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count) || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.updateNoneKeyColumns(ctx)) { - return - } - - if !a.NoError(loopUntilMaxB(ctx, &tableFrom, &tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedUpdatePrimary(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.updateAll(ctx)) { - return - } - - if !a.NoError(loopUntilMaxB(ctx, &tableFrom, &tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestTypes(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the sinks - sinks, err := CreateSinks(ctx, db, []ConfigEntry{}) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - testcases := []struct { - name string - columnType string - columnValue string - indexable bool - }{ - {`string_array`, `STRING[]`, `{"sky","road","car"}`, false}, - {`string_array_null`, `STRING[]`, ``, false}, - {`int_array`, `INT[]`, `{1,2,3}`, false}, - {`int_array_null`, `INT[]`, ``, false}, - {`serial_array`, `SERIAL[]`, `{148591304110702593,148591304110702594,148591304110702595}`, false}, - {`serial_array_null`, `SERIAL[]`, ``, false}, - {`bit`, `VARBIT`, `10010101`, true}, - {`bit_null`, `VARBIT`, ``, false}, - {`bool`, `BOOL`, `true`, true}, - {`bool_null`, `BOOL`, ``, false}, - {`bytes`, `BYTES`, `b'\141\061\142\062\143\063'`, true}, - {`collate`, `STRING COLLATE de`, `'a1b2c3' COLLATE de`, true}, - {`collate_null`, `STRING COLLATE de`, ``, false}, - {`date`, `DATE`, `2016-01-25`, true}, - {`date_null`, `DATE`, ``, false}, - {`decimal`, `DECIMAL`, `1.2345`, true}, - {`decimal_null`, `DECIMAL`, ``, false}, - {`float`, `FLOAT`, `1.2345`, true}, - {`float_null`, `FLOAT`, ``, false}, - // {`geography`, `GEOGRAPHY`, `0101000020E6100000000000000000F03F0000000000000040`, false}, - // {`geometry`, `GEOMETRY`, `010100000075029A081B9A5DC0F085C954C1F84040`, false}, - {`inet`, `INET`, `192.168.0.1`, true}, - {`inet_null`, `INET`, ``, false}, - {`int`, `INT`, `12345`, true}, - {`int_null`, `INT`, ``, false}, - {`interval`, `INTERVAL`, `2h30m30s`, true}, - {`interval_null`, `INTERVAL`, ``, false}, - { - `jsonb`, - `JSONB`, - ` - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ], - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ] - ] - } - ], - "map": { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ], - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ] - ] - } - ] - } - } - `, - false, - }, - {`jsonb_null`, `JSONB`, ``, false}, - {`serial`, `SERIAL`, `148591304110702593`, true}, - // serial cannot be null - {`string`, `STRING`, `a1b2c3`, true}, - {`string_null`, `STRING`, ``, false}, - {`string_escape`, `STRING`, `a1\b/2?c"3`, true}, - {`time`, `TIME`, `01:23:45.123456`, true}, - {`time_null`, `TIME`, ``, false}, - {`timestamp`, `TIMESTAMP`, `2016-01-25 10:10:10`, true}, - {`timestamp_null`, `TIMESTAMP`, ``, false}, - {`timestamptz`, `TIMESTAMPTZ`, `2016-01-25 10:10:10-05:00`, true}, - {`timestamptz_null`, `TIMESTAMPTZ`, ``, false}, - {`uuid`, `UUID`, `7f9c24e8-3b12-4fef-91e0-56a2d5a246ec`, true}, - {`uuid_null`, `UUID`, ``, false}, - } - - tableIndexableSchema := `CREATE TABLE %s (a %s PRIMARY KEY, b %s)` - tableNonIndexableSchema := `CREATE TABLE %s (a INT PRIMARY KEY, b %s)` - - for _, test := range testcases { - t.Run(test.name, func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) - defer cancel() - - tableIn := tableInfo{ - db: db, - dbName: dbName, - name: fmt.Sprintf("in_%s", test.name), - } - tableOut := tableInfo{ - db: db, - dbName: dbName, - name: fmt.Sprintf("out_%s", test.name), - } - - // Drop both tables if they already exist. - if !a.NoError(tableIn.dropTable(ctx)) { - return - } - if !a.NoError(tableOut.dropTable(ctx)) { - return - } - - // Create both tables. - if test.indexable { - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableIndexableSchema, tableIn.getFullName(), test.columnType, test.columnType, - ))) { - return - } - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableIndexableSchema, tableOut.getFullName(), test.columnType, test.columnType, - ))) { - return - } - } else { - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableNonIndexableSchema, tableIn.getFullName(), test.columnType, - ))) { - return - } - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableNonIndexableSchema, tableOut.getFullName(), test.columnType, - ))) { - return - } - } - - // Defer a table drop for both tables to clean them up. - defer tableIn.dropTable(ctx) - defer tableOut.dropTable(ctx) - - // Create the sink - // There is no way to remove a sink at this time, and that should be ok - // for these tests. - if !a.NoError(sinks.AddSink(ctx, db, ConfigEntry{ - Endpoint: endpointTest, - DestinationDatabase: dbName, - DestinationTable: tableOut.name, - SourceTable: tableIn.name, - })) { - return - } - - // Create the CDC feed. - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableIn) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - // Insert a row into the in table. - if test.indexable { - if !a.NoError(Execute(ctx, db, - fmt.Sprintf("INSERT INTO %s (a,b) VALUES ($1,$2)", tableIn.getFullName()), - test.columnValue, test.columnValue, - )) { - return - } - } else { - value := interface{}(test.columnValue) - if len(test.columnValue) == 0 { - value = nil - } - if !a.NoError(Execute(ctx, db, - fmt.Sprintf("INSERT INTO %s (a, b) VALUES (1, $1)", tableIn.getFullName()), - value, - )) { - return - } - } - - // Wait until the out table has a row. - for { - count, err := tableOut.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if count > 0 { - break - } - } - - // Now fetch that rows and compare them. - var inA, inB interface{} - if !a.NoError(Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT a, b FROM %s LIMIT 1", tableIn.getFullName()), - ).Scan(&inA, &inB) - })) { - return - } - var outA, outB interface{} - if !a.NoError(Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT a, b FROM %s LIMIT 1", tableOut.getFullName()), - ).Scan(&outA, &outB) - })) { - return - } - a.Equal(fmt.Sprintf("%v", inA), fmt.Sprintf("%v", outA), "A") - a.Equal(fmt.Sprintf("%v", inB), fmt.Sprintf("%v", outB), "B") - }) - } -} - -func TestConfig(t *testing.T) { - testCases := []struct { - name string - testJSON string - expectedPass bool - expectedConfig Config - }{ - { - name: "empty", - testJSON: "", - expectedPass: false, - }, - { - name: "empty2", - testJSON: "[]", - expectedPass: false, - }, - { - name: "empty3", - testJSON: "[{}]", - expectedPass: false, - }, - { - name: "missing endpoint", - testJSON: `[{"source_table":"s_tbl", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing source table", - testJSON: `[{"endpoint":"test.sql", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing destination database", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing destination table", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db"}]`, - expectedPass: false, - }, - { - name: "empty endpoint", - testJSON: `[{"endpoint":"", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty source table", - testJSON: `[{"endpoint":"test.sql", "source_table":"", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty destination database", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty destination table", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":""}]`, - expectedPass: false, - }, - { - name: "single", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":"d_tbl"}]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl", DestinationDatabase: "d_db", DestinationTable: "d_tbl"}, - }, - }, - { - name: "double", - testJSON: `[ - {"endpoint":"test.sql", "source_table":"s_tbl1", "destination_database":"d_db", "destination_table":"d_tbl1"}, - {"endpoint":"test.sql", "source_table":"s_tbl2", "destination_database":"d_db", "destination_table":"d_tbl2"} -]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl1", DestinationDatabase: "d_db", DestinationTable: "d_tbl1"}, - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl2", DestinationDatabase: "d_db", DestinationTable: "d_tbl2"}, - }, - }, - { - name: "triple", - testJSON: `[ - {"endpoint":"test1.sql", "source_table":"s_tbl1", "destination_database":"d_db1", "destination_table":"d_tbl1"}, - {"endpoint":"test1.sql", "source_table":"s_tbl2", "destination_database":"d_db1", "destination_table":"d_tbl2"}, - {"endpoint":"test2.sql", "source_table":"s_tbl3", "destination_database":"d_db2", "destination_table":"d_tbl3"} -]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test1.sql", SourceTable: "s_tbl1", DestinationDatabase: "d_db1", DestinationTable: "d_tbl1"}, - ConfigEntry{Endpoint: "test1.sql", SourceTable: "s_tbl2", DestinationDatabase: "d_db1", DestinationTable: "d_tbl2"}, - ConfigEntry{Endpoint: "test2.sql", SourceTable: "s_tbl3", DestinationDatabase: "d_db2", DestinationTable: "d_tbl3"}, - }, - }, - } - - for _, test := range testCases { - t.Run(test.name, func(t *testing.T) { - a := assert.New(t) - - actual, err := parseConfig(test.testJSON) - if test.expectedPass { - a.NoError(err) - a.True(reflect.DeepEqual(test.expectedConfig, actual)) - } else { - a.Error(err) - } - }) - } -} - -func TestMultipleFeeds(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - testcases := []struct { - feedCount int - tablesPerFeed int - populateCount int - }{ - {1, 1, 1000}, - {1, 2, 10}, - {1, 3, 10}, - {2, 1, 10}, - {2, 2, 10}, - {2, 3, 10}, - {3, 1, 10}, - {3, 2, 10}, - {3, 3, 10}, - } - - nameEndpoint := func(feedID int) string { - return fmt.Sprintf("test_%d_%s", feedID, endpointTest) - } - - for _, testcase := range testcases { - t.Run(fmt.Sprintf("Feeds_%d_Tables_%d_Size_%d", - testcase.feedCount, testcase.tablesPerFeed, testcase.populateCount, - ), func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create all the tables - var sourceTablesByFeed [][]*tableInfoSimple - var destinationTablesByFeed [][]*tableInfoSimple - for i := 0; i < testcase.feedCount; i++ { - var sourceTables []*tableInfoSimple - var destinationTables []*tableInfoSimple - for j := 0; j < testcase.tablesPerFeed; j++ { - sourceTable, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoErrorf(err, "create source i=%d, j=%d", i, j) { - return - } - sourceTables = append(sourceTables, &sourceTable) - destinationTable, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoErrorf(err, "create dest i=%d, j=%d", i, j) { - return - } - destinationTables = append(destinationTables, &destinationTable) - } - sourceTablesByFeed = append(sourceTablesByFeed, sourceTables) - destinationTablesByFeed = append(destinationTablesByFeed, destinationTables) - } - - // Populate all the source tables - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - if !a.NoError(table.populateTable(ctx, testcase.populateCount), table.name) { - return - } - } - } - - // Create the sinks - sinks, err := CreateSinks(ctx, db, []ConfigEntry{}) - if !a.NoError(err) { - return - } - - // Create all the sinks - for i := 0; i < testcase.feedCount; i++ { - for j := 0; j < testcase.tablesPerFeed; j++ { - if !a.NoErrorf(sinks.AddSink(ctx, db, ConfigEntry{ - Endpoint: nameEndpoint(i), - DestinationDatabase: destinationTablesByFeed[i][j].dbName, - DestinationTable: destinationTablesByFeed[i][j].name, - SourceTable: sourceTablesByFeed[i][j].name, - }), "AddSink i=%d j=%d", i, j) { - return - } - } - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer(http.HandlerFunc(handler)) - defer server.Close() - - // Create the changefeeds - for i := 0; i < testcase.feedCount; i++ { - var tableInfos []tableInfo - for _, table := range sourceTablesByFeed[i] { - tableInfos = append(tableInfos, table.tableInfo) - } - job, err := createChangeFeed(ctx, db, server.URL, nameEndpoint(i), tableInfos...) - if !a.NoErrorf(err, "changefeed %d", i) { - return - } - defer job.cancelJob(ctx) - } - - // Add some more lines to each table. - // Populate all the source tables - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - if !a.NoError(table.populateTable(ctx, testcase.populateCount), table.name) { - return - } - } - } - - // Make sure each table has 20 rows - for _, feedTables := range destinationTablesByFeed { - for _, table := range feedTables { - // Wait until table is populated - for { - count, err := table.getTableRowCount(ctx) - if !a.NoError(err, table) { - return - } - if count == testcase.populateCount*2 { - break - } - } - } - } - - // Update all rows in the source table. - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - a.NoErrorf(table.updateAll(ctx), "updateAll %s", table) - } - } - - // Make sure each table has 20 rows - for i, feedTables := range destinationTablesByFeed { - for j, table := range feedTables { - tableB, err := table.maxB(ctx) - if !a.NoError(err, table.String()) { - return - } - sourceB, err := sourceTablesByFeed[i][j].maxB(ctx) - if !a.NoError(err, sourceTablesByFeed[i][j].String()) { - return - } - if tableB == sourceB { - break - } - } - } - - // Delete all rows in the table. - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - a.NoErrorf(table.deleteAll(ctx), "deleting %s", table) - } - } - - // Make sure each table is drained. - for _, feedTables := range destinationTablesByFeed { - for _, table := range feedTables { - for { - count, err := table.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if count == 0 { - break - } - } - } - } - }) - } -} - -func TestLargeClobs(t *testing.T) { - const clobSize = 5 * 1024 - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestClobTable(ctx, db, dbName, clobSize) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestClobTable(ctx, db, dbName, clobSize) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - t.Log("Waiting for sync") - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -// TestComputedColumns ensures that tables which contain computed (or -// otherwise magic) columns can be syndicated. -func TestComputedColumns(t *testing.T) { - if strings.Contains(dbVersion, "v20.2.") { - t.Skip("VIRTUAL columns not supported on v20.2") - } - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestComputedTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestComputedTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - t.Log("Waiting for sync") - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func loopUntilMaxB( - ctx context.Context, - tableTo, tableFrom interface { - maxB(context.Context) (int, error) - }, -) error { - for { - if err := ctx.Err(); err != nil { - return err - } - toCount, err := tableTo.maxB(ctx) - if err != nil { - return errors.Wrap(err, "querying to") - } - fromCount, err := tableFrom.maxB(ctx) - if err != nil { - return errors.Wrap(err, "querying from") - } - if toCount == fromCount { - break - } - } - return nil -} - -func loopUntilSync( - ctx context.Context, - tableTo, tableFrom interface { - getTableRowCount(context.Context) (int, error) - }, -) error { - for { - if err := ctx.Err(); err != nil { - return err - } - toCount, err := tableTo.getTableRowCount(ctx) - if err != nil { - return errors.Wrap(err, "querying to") - } - fromCount, err := tableFrom.getTableRowCount(ctx) - if err != nil { - return errors.Wrap(err, "querying from") - } - if toCount == fromCount { - break - } - } - return nil -} - -// clobData returns a reader that will generate some number of bytes. -// The nonce value is used to perturb the sequence. -func clobData(legnth, nonce int) io.Reader { - ret := &io.LimitedReader{R: &clobSourceReader{}, N: int64(nonce + legnth)} - nonce = nonce % len(clobSourceTest) - _, _ = io.CopyN(io.Discard, ret, int64(nonce)) - return ret -} - -const clobSourceTest = "_abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ!" - -// clobSourceReader returns an infinitely long sequence of data. Use -// the clobData function instead. -type clobSourceReader struct{} - -// Read will fill the buffer with data. -func (c *clobSourceReader) Read(p []byte) (n int, err error) { - ret := len(p) - for len(p) >= len(clobSourceTest) { - copy(p, clobSourceTest) - p = p[len(clobSourceTest):] - } - if rem := len(p); rem > 0 { - copy(p, clobSourceTest[:rem]) - } - return ret, nil -} diff --git a/resolved_table.go b/resolved_table.go deleted file mode 100644 index 9a4e0374..00000000 --- a/resolved_table.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" -) - -const resolvedTableSchema = ` -CREATE TABLE IF NOT EXISTS %s ( - endpoint STRING PRIMARY KEY, - nanos INT NOT NULL, - logical INT NOT NULL -) -` - -// Make this an option? -const resolvedTableName = `_release` - -const resolvedTableQuery = `SELECT endpoint, nanos, logical FROM %s WHERE endpoint = $1` - -const resolvedTableWrite = `UPSERT INTO %s (endpoint, nanos, logical) VALUES ($1, $2, $3)` - -func resolvedFullTableName() string { - return fmt.Sprintf("%s.%s", *sinkDB, resolvedTableName) -} - -// CreateResolvedTable creates a release table if none exists. -func CreateResolvedTable(ctx context.Context, db *pgxpool.Pool) error { - return Execute(ctx, db, fmt.Sprintf(resolvedTableSchema, resolvedFullTableName())) -} - -// ResolvedLine is used to parse a json line in the request body of a resolved -// message. -type ResolvedLine struct { - // These are use for parsing the resolved line. - Resolved string `json:"resolved"` - - // There are used for storing back into the resolved table. - nanos int64 - logical int - endpoint string -} - -func parseResolvedLine(rawBytes []byte, endpoint string) (ResolvedLine, error) { - resolvedLine := ResolvedLine{ - endpoint: endpoint, - } - json.Unmarshal(rawBytes, &resolvedLine) - - // Prase the timestamp into nanos and logical. - var err error - resolvedLine.nanos, resolvedLine.logical, err = parseSplitTimestamp(resolvedLine.Resolved) - if err != nil { - return ResolvedLine{}, err - } - if resolvedLine.nanos == 0 { - return ResolvedLine{}, fmt.Errorf("no nano component to the 'updated' timestamp field") - } - - return resolvedLine, nil -} - -// getPreviousResolvedTimestamp returns the last recorded resolved for a -// specific endpoint. -func getPreviousResolved(ctx context.Context, tx pgxtype.Querier, endpoint string) (ResolvedLine, error) { - // Needs retry. - var resolvedLine ResolvedLine - err := tx.QueryRow(ctx, - fmt.Sprintf(resolvedTableQuery, resolvedFullTableName()), endpoint, - ).Scan(&(resolvedLine.endpoint), &(resolvedLine.nanos), &(resolvedLine.logical)) - switch err { - case pgx.ErrNoRows: - // No line exists yet, go back to the start of time. - return ResolvedLine{endpoint: endpoint}, nil - case nil: - // Found the line. - return resolvedLine, nil - default: - return ResolvedLine{}, err - } -} - -// Writes the updated timestamp to the resolved table. -func (rl ResolvedLine) writeUpdated(ctx context.Context, tx pgxtype.Querier) error { - // Needs retry. - _, err := tx.Exec(ctx, fmt.Sprintf(resolvedTableWrite, resolvedFullTableName()), - rl.endpoint, rl.nanos, rl.logical, - ) - return err -} diff --git a/resolved_table_test.go b/resolved_table_test.go deleted file mode 100644 index a8b52aee..00000000 --- a/resolved_table_test.go +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "testing" - - "github.com/jackc/pgx/v4/pgxpool" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -func (rl ResolvedLine) writeUpdatedDB(ctx context.Context, db *pgxpool.Pool) error { - return Retry(ctx, func(ctx context.Context) error { - return rl.writeUpdated(ctx, db) - }) -} - -func getPreviousResolvedDB(ctx context.Context, db *pgxpool.Pool, endpoint string) (ResolvedLine, error) { - var resolvedLine ResolvedLine - if err := Retry(ctx, func(ctx context.Context) error { - var err error - resolvedLine, err = getPreviousResolved(ctx, db, endpoint) - return err - }); err != nil { - return ResolvedLine{}, err - } - return resolvedLine, nil -} - -func TestParseResolvedLine(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedNanos int64 - expectedLogical int - expectedEndpoint string - }{ - { - `{"resolved": "1586020760120222000.0000000000"}`, - true, 1586020760120222000, 0, "endpoint.sql", - }, - { - `{}`, - false, 0, 0, "", - }, - { - `"resolved": "1586020760120222000"}`, - false, 0, 0, "", - }, - { - `{"resolved": "0.0000000000"}`, - false, 0, 0, "", - }, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - a := assert.New(t) - actual, actualErr := parseResolvedLine([]byte(test.testcase), "endpoint.sql") - if test.expectedPass && !a.NoError(actualErr) { - return - } - if !test.expectedPass { - return - } - a.Equal(test.expectedNanos, actual.nanos, "nanos") - a.Equal(test.expectedLogical, actual.logical, "logical") - a.Equal(test.expectedEndpoint, actual.endpoint, "endpoint") - }) - } -} - -func TestResolvedTable(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, _, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - if !a.NoError(CreateResolvedTable(ctx, db)) { - return - } - - checkResolved := func(y ResolvedLine, z ResolvedLine) bool { - return a.Equal(y.endpoint, z.endpoint, "endpoint") && - a.Equal(y.nanos, z.nanos, "nanos") && - a.Equal(y.logical, z.logical, "logical") - } - - // Make sure there are no rows in the table yet. - if rowCount, err := getRowCount(ctx, db, resolvedFullTableName()); !a.NoError(err) || - !a.Equal(0, rowCount) { - return - } - - // Find no previous value for endpoint "one". - if one, err := getPreviousResolvedDB(ctx, db, "one"); !a.NoError(err) || - !checkResolved(ResolvedLine{endpoint: "one"}, one) { - return - } - - // Push 10 updates rows to the resolved table and check each one. - for i := 0; i < 10; i++ { - newOne := ResolvedLine{ - endpoint: "one", - nanos: int64(i), - logical: i, - } - if err := newOne.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - if previousOne, err := getPreviousResolvedDB(ctx, db, "one"); !a.NoError(err) || - !checkResolved(newOne, previousOne) { - return - } - } - - // Now do the same for a second endpoint. - if two, err := getPreviousResolvedDB(ctx, db, "two"); !a.NoError(err) || - !checkResolved(ResolvedLine{endpoint: "two"}, two) { - return - } - - // Push 10 updates rows to the resolved table and check each one. - for i := 0; i < 10; i++ { - newOne := ResolvedLine{ - endpoint: "two", - nanos: int64(i), - logical: i, - } - if err := newOne.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - if previousOne, err := getPreviousResolvedDB(ctx, db, "two"); !a.NoError(err) || - !checkResolved(newOne, previousOne) { - return - } - } - - // Now intersperse the updates. - for i := 100; i < 120; i++ { - newResolved := ResolvedLine{ - nanos: int64(i), - logical: i, - } - if i%2 == 0 { - newResolved.endpoint = "one" - } else { - newResolved.endpoint = "two" - } - - if err := newResolved.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - previousResolved, err := getPreviousResolvedDB(ctx, db, newResolved.endpoint) - if !a.NoError(err) || !checkResolved(newResolved, previousResolved) { - return - } - } - - // Finally, check to make sure that there are only 2 lines in the resolved - // table. - rowCount, err := getRowCount(ctx, db, resolvedFullTableName()) - a.Equal(2, rowCount, "rowCount") - a.NoError(err) -} diff --git a/sink.go b/sink.go deleted file mode 100644 index 9b175d4e..00000000 --- a/sink.go +++ /dev/null @@ -1,329 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bufio" - "bytes" - "context" - "encoding/json" - "fmt" - "log" - "net/http" - "sort" - "strings" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4/pgxpool" -) - -// Sink holds all the info needed for a specific table. -type Sink struct { - originalTableName string - resultTableFullName string - sinkTableFullName string - primaryKeyColumns []string - endpoint string - ignoredColumns map[string]struct{} -} - -// CreateSink creates all the required tables and returns a new Sink. -func CreateSink( - ctx context.Context, db *pgxpool.Pool, - originalTable string, resultDB string, resultTable string, endpoint string, -) (*Sink, error) { - // Check to make sure the table exists. - resultTableFullName := fmt.Sprintf("%s.%s", resultDB, resultTable) - exists, err := TableExists(ctx, db, resultDB, resultTable) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("table %s could not be found", resultTableFullName) - } - - sinkTableFullName := SinkTableFullName(resultDB, resultTable) - if err := CreateSinkTable(ctx, db, sinkTableFullName); err != nil { - return nil, err - } - - columns, err := GetPrimaryKeyColumns(ctx, db, resultTableFullName) - if err != nil { - return nil, err - } - - toIgnore, err := GetIgnoredColumns(ctx, db, resultTableFullName) - if err != nil { - return nil, err - } - ignoreMap := make(map[string]struct{}, len(toIgnore)) - for _, col := range toIgnore { - ignoreMap[col] = struct{}{} - } - - sink := &Sink{ - originalTableName: originalTable, - resultTableFullName: resultTableFullName, - sinkTableFullName: sinkTableFullName, - primaryKeyColumns: columns, - endpoint: endpoint, - ignoredColumns: ignoreMap, - } - - return sink, nil -} - -const chunkSize = 1000 - -// HandleRequest is a handler used for this specific sink. -func (s *Sink) HandleRequest(db *pgxpool.Pool, w http.ResponseWriter, r *http.Request) { - scanner := bufio.NewScanner(r.Body) - defer r.Body.Close() - var lines []Line - for scanner.Scan() { - line, err := parseLine(scanner.Bytes()) - if err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - lines = append(lines, line) - if len(lines) >= chunkSize { - if err := WriteToSinkTable(r.Context(), db, s.sinkTableFullName, lines); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - log.Printf("%s: added %d operations", s.endpoint, chunkSize) - lines = []Line{} - } - } - if err := scanner.Err(); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if err := WriteToSinkTable(r.Context(), db, s.sinkTableFullName, lines); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - log.Printf("%s: added %d operations", s.endpoint, len(lines)) -} - -// deleteRows preforms all the deletes specified in lines. -func (s *Sink) deleteRows(ctx context.Context, tx pgxtype.Querier, lines []Line) error { - if len(lines) == 0 { - return nil - } - - var chunks [][]Line - for i := 0; i < len(lines); i += chunkSize { - end := i + chunkSize - if end > len(lines) { - end = len(lines) - } - chunks = append(chunks, lines[i:end]) - } - - for _, chunk := range chunks { - // Build the statement. - var statement strings.Builder - fmt.Fprintf(&statement, "DELETE FROM %s WHERE (", s.resultTableFullName) - for i, column := range s.primaryKeyColumns { - if i > 0 { - fmt.Fprint(&statement, ",") - } - // Placeholder index always starts at 1. - fmt.Fprintf(&statement, "%s", column) - } - fmt.Fprintf(&statement, ") IN (") - var keys []interface{} - for i, line := range chunk { - // Parse out the primary key values. - key := make([]interface{}, 0, len(s.primaryKeyColumns)) - dec := json.NewDecoder(bytes.NewReader(line.key)) - dec.UseNumber() - if err := dec.Decode(&key); err != nil { - return err - } - - if i > 0 { - fmt.Fprintf(&statement, ",") - } - fmt.Fprintf(&statement, "(") - for i, key := range key { - if i > 0 { - fmt.Fprintf(&statement, ",") - } - keys = append(keys, key) - fmt.Fprintf(&statement, "$%d", len(keys)) - } - fmt.Fprintf(&statement, ")") - } - fmt.Fprintf(&statement, ")") - - // Upsert the line - if _, err := tx.Exec(ctx, statement.String(), keys...); err != nil { - return err - } - } - return nil -} - -// upsertRows performs all upserts specified in lines. -func (s *Sink) upsertRows(ctx context.Context, tx pgxtype.Querier, lines []Line) error { - const starterColumns = 16 - if len(lines) == 0 { - return nil - } - - // Get all the column names and order them alphabetically. - allNames, err := lines[0].extractColumns(make([]string, 0, starterColumns)) - if err != nil { - return err - } - - // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating - columnNames := allNames[:0] - for _, name := range allNames { - if _, ignored := s.ignoredColumns[name]; !ignored { - columnNames = append(columnNames, name) - } - } - sort.Strings(columnNames) - - var chunks [][]Line - for i := 0; i < len(lines); i += chunkSize { - end := i + chunkSize - if end > len(lines) { - end = len(lines) - } - chunks = append(chunks, lines[i:end]) - } - - for _, chunk := range chunks { - // Build the statement. - var statement strings.Builder - // TODO: This first part can be memoized as long as there are no schema - // changes. - fmt.Fprintf(&statement, "UPSERT INTO %s (", s.resultTableFullName) - - for i, name := range columnNames { - if i > 0 { - fmt.Fprintf(&statement, ",") - } - fmt.Fprint(&statement, name) - } - fmt.Fprint(&statement, ") VALUES ") - - var values []interface{} - for i, line := range chunk { - data := make(map[string]interface{}, starterColumns) - if err := line.parseAfter(data); err != nil { - return nil - } - if i == 0 { - fmt.Fprintf(&statement, "(") - } else { - fmt.Fprintf(&statement, ",(") - } - for j, name := range columnNames { - values = append(values, data[name]) - if j == 0 { - fmt.Fprintf(&statement, "$%d", len(values)) - } else { - fmt.Fprintf(&statement, ",$%d", len(values)) - } - } - fmt.Fprintf(&statement, ")") - } - - // Upsert the line - if _, err := tx.Exec(ctx, statement.String(), values...); err != nil { - return err - } - } - return nil -} - -// UpdateRows updates all changed rows. -func (s *Sink) UpdateRows(ctx context.Context, tx pgxtype.Querier, prev ResolvedLine, next ResolvedLine) error { - // First, gather all the rows to update. - lines, err := DrainAllRowsToUpdate(ctx, tx, s.sinkTableFullName, prev, next) - if err != nil { - return err - } - - if len(lines) == 0 { - return nil - } - - log.Printf("%s: %s executed %d operations", s.endpoint, s.sinkTableFullName, len(lines)) - - // TODO: Batch these by 100 rows? Not sure what the max should be. - - var upserts []Line - var deletes []Line - - // This must happen in reverse order and all keys must be kept track of. - // This way, we can ensure that more recent changes overwrite earlier ones - // without having to perform multiple upserts/deletes to the db. - usedKeys := make(map[string]struct{}) - for i := len(lines) - 1; i >= 0; i-- { - line := lines[i] - - // Did we updates this line already? If so, don't perform this update. - if _, exist := usedKeys[string(line.key)]; exist { - continue - } - usedKeys[string(line.key)] = struct{}{} - - // Parse the key into columns - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - key := make([]interface{}, 0, len(s.primaryKeyColumns)) - dec := json.NewDecoder(bytes.NewReader(line.key)) - dec.UseNumber() - if err := dec.Decode(&key); err != nil { - return err - } - - // Is this needed? What if we have 2 primary key columns but the 2nd one - // nullable or has a default? Does CDC send it? - if len(key) != len(s.primaryKeyColumns) { - return fmt.Errorf( - "table %s has %d primary key columns %v, but only got %d keys %v", - s.resultTableFullName, - len(s.primaryKeyColumns), - s.primaryKeyColumns, - len(key), - key, - ) - } - - // Is this a delete? - if string(line.after) == "null" { - deletes = append(deletes, line) - } else { - // This must be an upsert statement. - upserts = append(upserts, line) - } - } - - // Delete all rows - if err := s.deleteRows(ctx, tx, deletes); err != nil { - return err - } - - // Upsert all rows - return s.upsertRows(ctx, tx, upserts) -} diff --git a/sink_table.go b/sink_table.go deleted file mode 100644 index e6f7bec3..00000000 --- a/sink_table.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "strconv" - "strings" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4/pgxpool" -) - -const sinkTableSchema = ` -CREATE TABLE IF NOT EXISTS %s ( - nanos INT NOT NULL, - logical INT NOT NULL, - key STRING NOT NULL, - after STRING, - PRIMARY KEY (nanos, logical, key) -) -` - -const sinkTableWrite = `UPSERT INTO %s (nanos, logical, key, after) VALUES ` - -// Timestamps are less than and up to the resolved ones. -// For this $1 and $2 are previous resolved, $3 and $4 are the current -// resolved. -const sinkTableDrainRows = ` -DELETE -FROM %s -WHERE ((nanos = $1 AND logical > $2) OR (nanos > $1)) AND - ((nanos = $3 AND logical <= $4) OR (nanos < $3)) -RETURNING nanos, logical, key, after -` - -// SinkTableFullName creates the conjoined db/table name to be used by the sink -// table. -func SinkTableFullName(resultDB string, resultTable string) string { - return fmt.Sprintf("%s.%s_%s", *sinkDB, resultDB, resultTable) -} - -// Line stores pending mutations. -type Line struct { - after json.RawMessage // The mutations to apply: {"a": 1, "b": 1} - key json.RawMessage // Primary key values: [1, 2] - nanos int64 // HLC time base - logical int // HLC logical counter -} - -// extractColumns parses the keys from the "after" payload block and -// appends them to the given slice. -func (line *Line) extractColumns(into []string) ([]string, error) { - m := make(map[string]json.RawMessage) - dec := json.NewDecoder(bytes.NewReader(line.after)) - if err := dec.Decode(&m); err != nil { - return nil, err - } - for k := range m { - into = append(into, k) - } - return into, nil -} - -// parseAfter reifies the mutations to be applied. -func (line *Line) parseAfter(into map[string]interface{}) error { - // Parse the after columns - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - dec := json.NewDecoder(bytes.NewReader(line.after)) - dec.UseNumber() - return dec.Decode(&into) -} - -// getSinkTableValues is just the statements ordered as expected for the sink -// table insert statement. -func (line Line) getSinkTableValues() []interface{} { - return []interface{}{line.nanos, line.logical, string(line.key), string(line.after)} -} - -// parseSplitTimestamp splits a timestmap of tte format NNNN.LLL into an int64 -// for the nanos and an int for the logical component. -func parseSplitTimestamp(timestamp string) (int64, int, error) { - splits := strings.Split(timestamp, ".") - if len(splits) != 2 { - return 0, 0, fmt.Errorf("can't parse timestamp %s", timestamp) - } - nanos, err := strconv.ParseInt(splits[0], 0, 0) - if err != nil { - return 0, 0, err - } - if nanos <= 0 { - return 0, 0, fmt.Errorf("nanos must be greater than 0: %d", nanos) - } - logical, err := strconv.Atoi(splits[1]) - if err != nil { - return 0, 0, err - } - return nanos, logical, nil -} - -// parseLine takes a single line from an ndjson and extracts enough -// information to be able to persist it to the staging table. -func parseLine(rawBytes []byte) (Line, error) { - var payload struct { - After json.RawMessage `json:"after"` - Key json.RawMessage `json:"key"` - Updated string `json:"updated"` - } - - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - dec := json.NewDecoder(bytes.NewReader(rawBytes)) - dec.UseNumber() - if err := dec.Decode(&payload); err != nil { - return Line{}, err - } - - // Parse the timestamp into nanos and logical. - nanos, logical, err := parseSplitTimestamp(payload.Updated) - if err != nil { - return Line{}, err - } - if nanos == 0 { - return Line{}, fmt.Errorf("no nano component to the 'updated' timestamp field") - } - - return Line{ - after: payload.After, - key: payload.Key, - logical: logical, - nanos: nanos, - }, nil -} - -// CreateSinkTable creates if it does not exist, the a table used for sinking. -func CreateSinkTable(ctx context.Context, db *pgxpool.Pool, sinkTableFullName string) error { - return Execute(ctx, db, fmt.Sprintf(sinkTableSchema, sinkTableFullName)) -} - -// WriteToSinkTable upserts all lines to the sink table. Never submit more than -// 10,000 lines to this function at a time. -func WriteToSinkTable(ctx context.Context, db *pgxpool.Pool, sinkTableFullName string, lines []Line) error { - if len(lines) == 0 { - return nil - } - var statement strings.Builder - if _, err := fmt.Fprintf(&statement, sinkTableWrite, sinkTableFullName); err != nil { - return err - } - var values []interface{} - for i, line := range lines { - values = append(values, line.getSinkTableValues()...) - if i == 0 { - if _, err := fmt.Fprint(&statement, "($1,$2,$3,$4)"); err != nil { - return err - } - } else { - j := i * 4 - if _, err := fmt.Fprintf(&statement, ",($%d,$%d,$%d,$%d)", j+1, j+2, j+3, j+4); err != nil { - return err - } - } - } - - return Execute(ctx, db, statement.String(), values...) -} - -// DrainAllRowsToUpdate deletes and returns the rows that need to be -// updated from the sink table. -func DrainAllRowsToUpdate( - ctx context.Context, tx pgxtype.Querier, sinkTableFullName string, prev ResolvedLine, next ResolvedLine, -) ([]Line, error) { - rows, err := tx.Query(ctx, fmt.Sprintf(sinkTableDrainRows, sinkTableFullName), - prev.nanos, prev.logical, next.nanos, next.logical, - ) - if err != nil { - return nil, err - } - defer rows.Close() - var lines []Line - var line Line - for rows.Next() { - rows.Scan(&(line.nanos), &(line.logical), &(line.key), &(line.after)) - lines = append(lines, line) - } - return lines, nil -} diff --git a/sink_table_test.go b/sink_table_test.go deleted file mode 100644 index 9fd216c6..00000000 --- a/sink_table_test.go +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "encoding/json" - "fmt" - "math" - "testing" - - "github.com/jackc/pgx/v4/pgxpool" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -// findAllRowsToUpdateDB is a wrapper around FindAllRowsToUpdate that handles -// the transaction for testing. -func findAllRowsToUpdateDB( - ctx context.Context, db *pgxpool.Pool, sinkTableFullName string, prev ResolvedLine, next ResolvedLine, -) ([]Line, error) { - var lines []Line - - if err := Retry(ctx, func(ctx context.Context) error { - var err error - tx, err := db.Begin(ctx) - if err != nil { - return err - } - defer tx.Rollback(ctx) - lines, err = DrainAllRowsToUpdate(ctx, tx, sinkTableFullName, prev, next) - return err - }); err != nil { - return nil, err - } - return lines, nil -} - -func TestParseSplitTimestamp(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedNanos int64 - expectedLogical int - }{ - {"", false, 0, 0}, - {".", false, 0, 0}, - {"1233", false, 0, 0}, - {".1233", false, 0, 0}, - {"123.123", true, 123, 123}, - {"0.0", false, 0, 0}, - {"1586019746136571000.0000000000", true, 1586019746136571000, 0}, - {"1586019746136571000.0000000001", true, 1586019746136571000, 1}, - {"9223372036854775807.2147483647", true, math.MaxInt64, math.MaxInt32}, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - actualNanos, actualLogical, actualErr := parseSplitTimestamp(test.testcase) - if test.expectedPass == (actualErr != nil) { - t.Errorf("Expected %v, got %s", test.expectedPass, actualErr) - } - if test.expectedNanos != actualNanos { - t.Errorf("Expected %d nanos, got %d nanos", test.expectedNanos, actualNanos) - } - if test.expectedLogical != actualLogical { - t.Errorf("Expected %d nanos, got %d nanos", test.expectedLogical, actualLogical) - } - }) - } -} - -func TestParseLine(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedAfter string - expectedKey string - expectedNanos int64 - expectedLogical int - }{ - { - `{"after": {"a":9,"b":9}, "key": [9], "updated": "1586020760120222000.0000000000"}`, - true, `{"a":9,"b":9}`, `[9]`, 1586020760120222000, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9]`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9], "updated": "1586020760120222000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key":, "updated": "1586020760120222000.0000000000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9], "updated": "0.0000000000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "updated": "1586020760120222000.0000000000"}`, - false, "", "", 0, 0, - }, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - a := assert.New(t) - actual, actualErr := parseLine([]byte(test.testcase)) - if test.expectedPass && !a.NoError(actualErr) { - return - } - if !test.expectedPass { - return - } - a.Equal(test.expectedNanos, actual.nanos) - a.Equal(test.expectedLogical, actual.logical) - a.Equal(json.RawMessage(test.expectedKey), actual.key) - a.Equal(json.RawMessage(test.expectedAfter), actual.after) - }) - } -} - -func TestWriteToSinkTable(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); a.NoError(err) { - a.Equal(10, count) - } else { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - sink := sinks.FindSink(endpointTest, tableFrom.name) - if !a.NotNil(sink) { - return - } - - // Make sure there are no rows in the table yet. - if rowCount, err := getRowCount(ctx, db, sink.sinkTableFullName); a.NoError(err) { - a.Equal(0, rowCount) - } else { - return - } - - // Write 100 rows to the table. - var lines []Line - for i := 0; i < 100; i++ { - lines = append(lines, Line{ - nanos: int64(i), - logical: i, - key: json.RawMessage(fmt.Sprintf("[%d]", i)), - after: json.RawMessage(fmt.Sprintf(`{"a": %d`, i)), - }) - } - - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines); !a.NoError(err) { - return - } - - // Re-deliver a message to check at-least-once behavior. - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines[:1]); !a.NoError(err) { - return - } - - // Check to see if there are indeed 100 rows in the table. - if rowCount, err := getRowCount(ctx, db, sink.sinkTableFullName); a.NoError(err) { - a.Equal(100, rowCount) - } -} - -func TestFindAllRowsToUpdate(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Insert 100 rows into the table. - sink := sinks.FindSink(endpointTest, tableFrom.name) - var lines []Line - for i := 0; i < 10; i++ { - for j := 0; j < 10; j++ { - lines = append(lines, Line{ - nanos: int64(i), - logical: j, - after: json.RawMessage(fmt.Sprintf("{a=%d,b=%d}", i, j)), - key: json.RawMessage(fmt.Sprintf("[%d]", i)), - }) - } - } - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines); !a.NoError(err) { - return - } - - // Now find those rows from the start. - for i := 0; i < 10; i++ { - prev := ResolvedLine{ - endpoint: "test", - nanos: 0, - logical: 0, - } - next := ResolvedLine{ - endpoint: "test", - nanos: int64(i), - logical: i, - } - lines, err := findAllRowsToUpdateDB(ctx, db, sink.sinkTableFullName, prev, next) - if a.NoError(err) { - a.Len(lines, i*11) - } - } - - // And again but from the previous. - for i := 1; i < 10; i++ { - prev := ResolvedLine{ - endpoint: "test", - nanos: int64(i - 1), - logical: i - 1, - } - next := ResolvedLine{ - endpoint: "test", - nanos: int64(i), - logical: i, - } - lines, err := findAllRowsToUpdateDB(ctx, db, sink.sinkTableFullName, prev, next) - if a.NoError(err) { - a.Len(lines, 11) - } - } -} diff --git a/sinks.go b/sinks.go deleted file mode 100644 index 4a6ce900..00000000 --- a/sinks.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bufio" - "context" - "fmt" - "log" - "net/http" - "strings" - "sync" - - "github.com/jackc/pgx/v4/pgxpool" -) - -// Sinks holds a map of all known sinks. -type Sinks struct { - // Is this mutex overkill? Is it needed? There should never be any writes - // to the map after initialization. But guidance here is fuzzy, so I'll keep - // it in. - sync.RWMutex - - // endpoints can have multiple tables - sinksByTableByEndpoint map[string]map[string]*Sink -} - -// CreateSinks creates a new table sink and populates it based on the pass in -// config. -func CreateSinks(ctx context.Context, db *pgxpool.Pool, config Config) (*Sinks, error) { - sinks := &Sinks{ - sinksByTableByEndpoint: make(map[string]map[string]*Sink), - } - - for _, entry := range config { - if err := sinks.AddSink(ctx, db, entry); err != nil { - return nil, err - } - } - - return sinks, nil -} - -// AddSink creates and adds a new sink to the sinks map. -func (s *Sinks) AddSink(ctx context.Context, db *pgxpool.Pool, entry ConfigEntry) error { - s.Lock() - defer s.Unlock() - - sourceTable := strings.ToLower(strings.TrimSpace(entry.SourceTable)) - destinationDB := strings.ToLower(strings.TrimSpace(entry.DestinationDatabase)) - destinationTable := strings.ToLower(strings.TrimSpace(entry.DestinationTable)) - endpoint := strings.ToLower(strings.TrimSpace(entry.Endpoint)) - - // First check to make sure the endpoint exists, if it doesn't create one. - var sinksByTable map[string]*Sink - var exist bool - if sinksByTable, exist = s.sinksByTableByEndpoint[endpoint]; !exist { - sinksByTable = make(map[string]*Sink) - s.sinksByTableByEndpoint[endpoint] = sinksByTable - } - - // Check for a double table - if _, exist := sinksByTable[sourceTable]; exist { - return fmt.Errorf("duplicate table configuration entry found: %s", sourceTable) - } - - sink, err := CreateSink(ctx, db, sourceTable, destinationDB, destinationTable, endpoint) - if err != nil { - return err - } - sinksByTable[sourceTable] = sink - s.sinksByTableByEndpoint[endpoint] = sinksByTable - return nil -} - -// FindSink returns a sink for a given table name and endpoint. -func (s *Sinks) FindSink(endpoint string, table string) *Sink { - s.RLock() - defer s.RUnlock() - sinksByTable, exist := s.sinksByTableByEndpoint[endpoint] - if !exist { - return nil - } - return sinksByTable[table] -} - -// GetAllSinksByEndpoint gets a list of all known sinks. -func (s *Sinks) GetAllSinksByEndpoint(endpoint string) []*Sink { - s.RLock() - defer s.RUnlock() - var allSinks []*Sink - if sinksByTable, exists := s.sinksByTableByEndpoint[endpoint]; exists { - for _, sink := range sinksByTable { - allSinks = append(allSinks, sink) - } - } - return allSinks -} - -// HandleResolvedRequest parses and applies all the resolved upserts. -func (s *Sinks) HandleResolvedRequest( - ctx context.Context, db *pgxpool.Pool, rURL resolvedURL, w http.ResponseWriter, r *http.Request, -) { - scanner := bufio.NewScanner(r.Body) - defer r.Body.Close() - for scanner.Scan() { - next, err := parseResolvedLine(scanner.Bytes(), rURL.endpoint) - if err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Start the transation - if err := Retry(ctx, func(ctx context.Context) error { - tx, err := db.Begin(ctx) - if err != nil { - return err - } - defer tx.Rollback(ctx) - - // Get the previous resolved - prev, err := getPreviousResolved(ctx, tx, rURL.endpoint) - if err != nil { - return err - } - log.Printf("%s: resolved - timestamp %d.%d", next.endpoint, next.nanos, next.logical) - - // Find all rows to update and upsert them. - allSinks := s.GetAllSinksByEndpoint(rURL.endpoint) - for _, sink := range allSinks { - if err := sink.UpdateRows(ctx, tx, prev, next); err != nil { - return err - } - } - - // Write the updated resolved. - if err := next.writeUpdated(ctx, tx); err != nil { - return err - } - return tx.Commit(ctx) - }); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - } - if err := scanner.Err(); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } -} diff --git a/sql.go b/sql.go deleted file mode 100644 index 4f19f65f..00000000 --- a/sql.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" -) - -const sinkDBZoneConfig = `ALTER DATABASE %s CONFIGURE ZONE USING gc.ttlseconds = 600;` - -// CreateSinkDB creates a new sink db if one does not exist yet and also adds -// the resolved table. -func CreateSinkDB(ctx context.Context, db *pgxpool.Pool) error { - if err := Execute(ctx, db, fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", *sinkDB)); err != nil { - return err - } - if *sinkDBZone { - if err := Execute(ctx, db, fmt.Sprintf(sinkDBZoneConfig, *sinkDB)); err != nil { - return err - } - } - return CreateResolvedTable(ctx, db) -} - -// DropSinkDB drops the sinkDB and all data in it. -func DropSinkDB(ctx context.Context, db *pgxpool.Pool) error { - return Execute(ctx, db, fmt.Sprintf(`DROP DATABASE IF EXISTS %s CASCADE`, *sinkDB)) -} - -const sqlTableExistsQuery = `SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = '%s'` - -// TableExists checks for the existence of a table. -func TableExists(ctx context.Context, db *pgxpool.Pool, dbName string, tableName string) (bool, error) { - findTableSQL := fmt.Sprintf(sqlTableExistsQuery, dbName, tableName) - var tableFound string - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, findTableSQL).Scan(&tableFound) - }) - switch err { - case pgx.ErrNoRows: - return false, nil - case nil: - return true, nil - default: - return false, err - } -} - -const sqlGetIgnoredColumns = ` -SELECT column_name FROM [SHOW COLUMNS FROM %s] WHERE generation_expression != '' -` - -// GetIgnoredColumns returns the names of columns defined in the table -// which should not be updated. This is used to filter out columns -// related to certain database features, such as hash-sharded indexes. -func GetIgnoredColumns(ctx context.Context, db *pgxpool.Pool, tableFullName string) ([]string, error) { - findKeyColumns := fmt.Sprintf(sqlGetIgnoredColumns, tableFullName) - var columns []string - if err := Retry(ctx, func(ctx context.Context) error { - var columnsInternal []string - rows, err := db.Query(ctx, findKeyColumns) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var column string - if err := rows.Scan(&column); err != nil { - return err - } - columnsInternal = append(columnsInternal, column) - } - columns = columnsInternal - return nil - }); err != nil { - return nil, err - } - return columns, nil -} - -const sqlGetPrimaryKeyColumnsQuery = ` -SELECT column_name FROM [SHOW INDEX FROM %s] -WHERE index_name = 'primary' - AND NOT storing -ORDER BY seq_in_index -` - -// GetPrimaryKeyColumns returns the column names for the primary key index for -// a table, in order. -func GetPrimaryKeyColumns(ctx context.Context, db *pgxpool.Pool, tableFullName string) ([]string, error) { - // Needs retry. - findKeyColumns := fmt.Sprintf(sqlGetPrimaryKeyColumnsQuery, tableFullName) - var columns []string - if err := Retry(ctx, func(ctx context.Context) error { - var columnsInternal []string - rows, err := db.Query(ctx, findKeyColumns) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var column string - if err := rows.Scan(&column); err != nil { - return err - } - columnsInternal = append(columnsInternal, column) - } - columns = columnsInternal - return nil - }); err != nil { - return nil, err - } - return columns, nil -} - -// Execute is just a wrapper around Retry that can be used for sql -// queries that don't have any return values. -func Execute(ctx context.Context, db *pgxpool.Pool, query string, args ...interface{}) error { - return Retry(ctx, func(ctx context.Context) error { - _, err := db.Exec(ctx, query, args...) - return err - }) -} diff --git a/sql_test.go b/sql_test.go deleted file mode 100644 index 6b064655..00000000 --- a/sql_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -func TestGetPrimaryKeyColumns(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - testcases := []struct { - tableSchema string - primaryKeys []string - }{ - { - "a INT", - []string{"rowid"}, - }, - { - "a INT PRIMARY KEY", - []string{"a"}, - }, - { - "a INT, b INT, PRIMARY KEY (a,b)", - []string{"a", "b"}, - }, - { - "a INT, b INT, PRIMARY KEY (b,a)", - []string{"b", "a"}, - }, - { - "a INT, b INT, c INT, PRIMARY KEY (b,a,c)", - []string{"b", "a", "c"}, - }, - } - - for i, test := range testcases { - t.Run(fmt.Sprintf("%d:%s", i, test.tableSchema), func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - tableFullName := fmt.Sprintf("%s.test_%d", dbName, i) - if !a.NoError(Execute(ctx, db, - fmt.Sprintf(`CREATE TABLE %s ( %s )`, tableFullName, test.tableSchema))) { - return - } - columns, err := GetPrimaryKeyColumns(ctx, db, tableFullName) - if !a.NoError(err) { - return - } - a.Equal(test.primaryKeys, columns) - }) - } -} diff --git a/url.go b/url.go deleted file mode 100644 index 1a6cfb76..00000000 --- a/url.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "fmt" - "regexp" - "strconv" - "strings" - "time" -) - -// This is the timestamp format: YYYYMMDDHHMMSSNNNNNNNNNLLLLLLLLLL -// Formatting const stolen from https://github.com/cockroachdb/cockroach/blob/master/pkg/ccl/changefeedccl/sink_cloudstorage.go#L48 -const timestampDateTimeFormat = "20060102150405" - -func parseTimestamp(timestamp string, logical string) (time.Time, int, error) { - if len(timestamp) != 23 { - return time.Time{}, 0, fmt.Errorf("can't parse timestamp %s", timestamp) - } - if len(logical) != 10 { - return time.Time{}, 0, fmt.Errorf("can't parse logical timestamp %s", logical) - } - - // Parse the date and time. - timestampParsed, err := time.Parse(timestampDateTimeFormat, timestamp[0:14]) - if err != nil { - return time.Time{}, 0, err - } - - // Parse out the nanos - nanos, err := time.ParseDuration(timestamp[14:23] + "ns") - if err != nil { - return time.Time{}, 0, err - } - timestampParsed.Add(nanos) - - // Parse out the logical timestamp - logicalParsed, err := strconv.Atoi(logical) - if err != nil { - return time.Time{}, 0, err - } - - return timestampParsed, logicalParsed, nil -} - -// See https://www.cockroachlabs.com/docs/stable/create-changefeed.html#general-file-format -// Example: /test.sql//2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1.ndjson -// Format is: /[endpoint]/[date]/[timestamp]-[uniquer]-[topic]-[schema-id] -var ( - ndjsonRegex = regexp.MustCompile(`/(?P[^/]*)/(?P\d{4}-\d{2}-\d{2})/(?P.+)-(?P[^-]+)-(?P[^-]+).ndjson$`) - ndjsonEndpointIdx = ndjsonRegex.SubexpIndex("endpoint") - ndjsonTopicIdx = ndjsonRegex.SubexpIndex("topic") -) - -// ndjsonURL contains all the parsed info from an ndjson url. -type ndjsonURL struct { - endpoint string - topic string -} - -func parseNdjsonURL(url string) (ndjsonURL, error) { - match := ndjsonRegex.FindStringSubmatch(url) - if match == nil { - return ndjsonURL{}, fmt.Errorf("can't parse url %s", url) - } - - return ndjsonURL{ - endpoint: match[ndjsonEndpointIdx], - topic: match[ndjsonTopicIdx], - }, nil -} - -// Example: /test.sql/2020-04-04/202004042351304139680000000000000.RESOLVED -// Format is: /[endpoint]/[date]/[timestamp].RESOLVED -var resolvedRegex = regexp.MustCompile(`^/(?P.*)/(?P\d{4}-\d{2}-\d{2})/(?P\d{33}).RESOLVED$`) - -// resolvedURL contains all the parsed info from an ndjson url. -type resolvedURL struct { - endpoint string - date string - timestamp time.Time - timestampLogical int -} - -func parseResolvedURL(url string) (resolvedURL, error) { - match := resolvedRegex.FindStringSubmatch(url) - if len(match) != resolvedRegex.NumSubexp()+1 { - return resolvedURL{}, fmt.Errorf("can't parse url %s", url) - } - - var resolved resolvedURL - for i, name := range resolvedRegex.SubexpNames() { - switch name { - case "date": - resolved.date = strings.ToLower(match[i]) - case "timestamp": - if len(match[i]) != 33 { - return resolvedURL{}, fmt.Errorf( - "expected timestamp to be 33 characters long, got %d: %s", - len(match[i]), match[i], - ) - } - var err error - resolved.timestamp, resolved.timestampLogical, err = parseTimestamp( - match[i][0:23], match[i][23:33], - ) - if err != nil { - return resolvedURL{}, err - } - case "endpoint": - resolved.endpoint = strings.ToLower(match[i]) - default: - // Skip all the rest. - } - } - - return resolved, nil -} diff --git a/url_test.go b/url_test.go deleted file mode 100644 index 407a8747..00000000 --- a/url_test.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestNdjsonURL(t *testing.T) { - a := assert.New(t) - const u = "/endpoint/2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1f.ndjson" - - p, err := parseNdjsonURL(u) - if a.NoError(err) { - a.Equal("endpoint", p.endpoint) - a.Equal("test_table", p.topic) - } -}