diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9cbb04db..8d3e02f2 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -35,6 +35,8 @@ jobs: matrix: cockroachdb: [ v20.2, v21.1, v21.2 ] runs-on: ubuntu-latest + env: + COVER_OUT: coverage-${{ matrix.coverage }}.out steps: - uses: actions/checkout@v2 @@ -50,8 +52,15 @@ jobs: - name: Go Tests env: COCKROACH_DEV_LICENSE: ${{ secrets.COCKROACH_DEV_LICENSE }} - run: go test -v ./... + run: go test -v -race -coverpkg=./internal/... -covermode=atomic -coverprofile=${{ env.COVER_OUT }} ./... - name: Stop CockroachDB + if: ${{ always() }} working-directory: .github run: docker-compose down + + - name: Upload coverage + uses: codecov/codecov-action@v2 + with: + token: ${{ secrets.CODECOV_TOKEN }} + files: ${{ env.COVER_OUT }} diff --git a/go.mod b/go.mod index 1c669727..b2b5479b 100644 --- a/go.mod +++ b/go.mod @@ -3,28 +3,31 @@ module github.com/cockroachdb/cdc-sink go 1.17 require ( - github.com/jackc/pgconn v1.10.0 - github.com/jackc/pgtype v1.8.1 - github.com/jackc/pgx/v4 v4.13.0 + github.com/jackc/pgconn v1.10.1 + github.com/jackc/pgtype v1.9.1 + github.com/jackc/pgx/v4 v4.14.1 github.com/pkg/errors v0.9.1 github.com/stretchr/testify v1.7.0 - golang.org/x/lint v0.0.0-20190930215403-16217165b5de - golang.org/x/net v0.0.0-20211005215030-d2e5035098b3 - honnef.co/go/tools v0.0.1-2019.2.3 + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 + golang.org/x/net v0.0.0-20211209124913-491a49abca63 + honnef.co/go/tools v0.2.2 ) require ( - github.com/BurntSushi/toml v0.3.1 // indirect + github.com/BurntSushi/toml v0.4.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/jackc/chunkreader/v2 v2.0.1 // indirect github.com/jackc/pgio v1.0.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect - github.com/jackc/pgproto3/v2 v2.1.1 // indirect + github.com/jackc/pgproto3/v2 v2.2.0 // indirect github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b // indirect - github.com/jackc/puddle v1.1.4 // indirect + github.com/jackc/puddle v1.2.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect + golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b // indirect + golang.org/x/mod v0.5.1 // indirect + golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2 // indirect golang.org/x/text v0.3.7 // indirect - golang.org/x/tools v0.0.0-20200103221440-774c71fcf114 // indirect + golang.org/x/tools v0.1.8 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect ) diff --git a/go.sum b/go.sum index 3b2a7aa1..3b85ca9e 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/toml v0.4.1 h1:GaI7EiDXDRfa8VshkTj7Fym7ha+y8/XxIgD2okUIjLw= +github.com/BurntSushi/toml v0.4.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ= github.com/Masterminds/semver/v3 v3.1.1/go.mod h1:VPu/7SZ7ePZ3QOrcuXROw5FAcLl4a0cBrbBpGY/8hQs= github.com/cockroachdb/apd v1.1.0 h1:3LFP3629v+1aKXU5Q37mxmRxX/pIu1nijXydLShEq5I= github.com/cockroachdb/apd v1.1.0/go.mod h1:8Sl8LxpKi29FqWXR16WEFZRNSz3SoPzUzeMeY4+DwBQ= @@ -28,6 +30,8 @@ github.com/jackc/pgconn v1.9.0/go.mod h1:YctiPyvzfU11JFxoXokUOOKQXQmDMoJL9vJzHH8 github.com/jackc/pgconn v1.9.1-0.20210724152538-d89c8390a530/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= github.com/jackc/pgconn v1.10.0 h1:4EYhlDVEMsJ30nNj0mmgwIUXoq7e9sMJrVC2ED6QlCU= github.com/jackc/pgconn v1.10.0/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= +github.com/jackc/pgconn v1.10.1 h1:DzdIHIjG1AxGwoEEqS+mGsURyjt4enSmqzACXvVzOT8= +github.com/jackc/pgconn v1.10.1/go.mod h1:4z2w8XhRbP1hYxkpTuBjTS3ne3J48K83+u0zoyvg2pI= github.com/jackc/pgio v1.0.0 h1:g12B9UwVnzGhueNavwioyEEpAmqMe1E/BN9ES+8ovkE= github.com/jackc/pgio v1.0.0/go.mod h1:oP+2QK2wFfUWgr+gxjoBH9KGBb31Eio69xUb0w5bYf8= github.com/jackc/pgmock v0.0.0-20190831213851-13a1b77aafa2/go.mod h1:fGZlG77KXmcq05nJLRkk0+p82V8B8Dw8KN2/V9c/OAE= @@ -45,6 +49,8 @@ github.com/jackc/pgproto3/v2 v2.0.0-rc3.0.20190831210041-4c03ce451f29/go.mod h1: github.com/jackc/pgproto3/v2 v2.0.6/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= github.com/jackc/pgproto3/v2 v2.1.1 h1:7PQ/4gLoqnl87ZxL7xjO0DR5gYuviDCZxQJsUlFW1eI= github.com/jackc/pgproto3/v2 v2.1.1/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= +github.com/jackc/pgproto3/v2 v2.2.0 h1:r7JypeP2D3onoQTCxWdTpCtJ4D+qpKr0TxvoyMhZ5ns= +github.com/jackc/pgproto3/v2 v2.2.0/go.mod h1:WfJCnwN3HIg9Ish/j3sgWXnAfK8A9Y0bwXYU5xKaEdA= github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b h1:C8S2+VttkHFdOOCXJe+YGfa4vHYwlt4Zx+IVXQ97jYg= github.com/jackc/pgservicefile v0.0.0-20200714003250-2b9c44734f2b/go.mod h1:vsD4gTJCa9TptPL8sPkXrLZ+hDuNrZCnj29CQpr4X1E= github.com/jackc/pgtype v0.0.0-20190421001408-4ed0de4755e0/go.mod h1:hdSHsc1V01CGwFsrv11mJRHWJ6aifDLfdV3aVjFF0zg= @@ -53,17 +59,24 @@ github.com/jackc/pgtype v0.0.0-20190828014616-a8802b16cc59/go.mod h1:MWlu30kVJrU github.com/jackc/pgtype v1.8.1-0.20210724151600-32e20a603178/go.mod h1:C516IlIV9NKqfsMCXTdChteoXmwgUceqaLfjg2e3NlM= github.com/jackc/pgtype v1.8.1 h1:9k0IXtdJXHJbyAWQgbWr1lU+MEhPXZz6RIXxfR5oxXs= github.com/jackc/pgtype v1.8.1/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= +github.com/jackc/pgtype v1.9.1 h1:MJc2s0MFS8C3ok1wQTdQxWuXQcB6+HwAm5x1CzW7mf0= +github.com/jackc/pgtype v1.9.1/go.mod h1:LUMuVrfsFfdKGLw+AFFVv6KtHOFMwRgDDzBt76IqCA4= github.com/jackc/pgx/v4 v4.0.0-20190420224344-cc3461e65d96/go.mod h1:mdxmSJJuR08CZQyj1PVQBHy9XOp5p8/SHH6a0psbY9Y= github.com/jackc/pgx/v4 v4.0.0-20190421002000-1b8f0016e912/go.mod h1:no/Y67Jkk/9WuGR0JG/JseM9irFbnEPbuWV2EELPNuM= github.com/jackc/pgx/v4 v4.0.0-pre1.0.20190824185557-6972a5742186/go.mod h1:X+GQnOEnf1dqHGpw7JmHqHc1NxDoalibchSk9/RWuDc= github.com/jackc/pgx/v4 v4.12.1-0.20210724153913-640aa07df17c/go.mod h1:1QD0+tgSXP7iUjYm9C1NxKhny7lq6ee99u/z+IHFcgs= github.com/jackc/pgx/v4 v4.13.0 h1:JCjhT5vmhMAf/YwBHLvrBn4OGdIQBiFG6ym8Zmdx570= github.com/jackc/pgx/v4 v4.13.0/go.mod h1:9P4X524sErlaxj0XSGZk7s+LD0eOyu1ZDUrrpznYDF0= +github.com/jackc/pgx/v4 v4.14.1 h1:71oo1KAGI6mXhLiTMn6iDFcp3e7+zon/capWjl2OEFU= +github.com/jackc/pgx/v4 v4.14.1/go.mod h1:RgDuE4Z34o7XE92RpLsvFiOEfrAUT0Xt2KxvX73W06M= github.com/jackc/puddle v0.0.0-20190413234325-e4ced69a3a2b/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle v1.1.4 h1:5Ey/o5IfV7dYX6Znivq+N9MdK1S18OJI5OJq6EAAADw= github.com/jackc/puddle v1.1.4/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v1.2.0/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= +github.com/jackc/puddle v1.2.1 h1:gI8os0wpRXFd4FiAY2dWiqRK037tjj3t7rKFeO4X5iw= +github.com/jackc/puddle v1.2.1/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= @@ -107,6 +120,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= @@ -130,18 +145,34 @@ golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5y golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 h1:7I4JAnoQBe7ZtJcBaYHi5UtiO8tQHbUSXxL+pnGRANg= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= +golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b h1:QAqMVf3pSa6eeTsuklijukjXBlj7Es2QQplab+/RbQ4= +golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b h1:Wh+f8QHJXR411sJR8/vRBTZ7YapZaRvUcLFFJhusH0k= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 h1:VLliZ0d+/avPrXXH+OakdXhpJuEoBZuwh1m2j7U6Iug= +golang.org/x/lint v0.0.0-20210508222113-6edffad5e616/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0 h1:KU7oHjnv3XNWfa5COkzUifxZmxp1TyI7ImMXqFxLwvQ= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.5.1 h1:OJxoQ/rynoF0dcCdI7cLPktw/hR2cueqYfjm43oqK38= +golang.org/x/mod v0.5.1/go.mod h1:5OXOZSfqPIIbmVBIIKWRFfZjPR0E5r58TLhUjH0a2Ro= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20211005215030-d2e5035098b3 h1:G64nFNerDErBd2KdvHvIn3Ee6ccUQBTfhDZEO0DccfU= golang.org/x/net v0.0.0-20211005215030-d2e5035098b3/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211209124913-491a49abca63 h1:iocB37TsdFuN6IBRZ+ry36wrkoV51/tl5vOWqkcPGvY= +golang.org/x/net v0.0.0-20211209124913-491a49abca63/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -152,9 +183,13 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2 h1:oJg+vmWs1UY4oSg6n1drFSkU2Nc48mxtz5qhA0HaG0I= +golang.org/x/sys v0.0.0-20211214150614-024a26f5d6e2/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -171,12 +206,20 @@ golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190823170909-c4a336ef6a2f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200103221440-774c71fcf114 h1:DnSr2mCsxyCE6ZgIkmcWUQY2R5cH/6wL7eIxEmQOMSE= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200103221440-774c71fcf114/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef h1:RHORRhs540cYZYrzgU2CPUyykkwZM78hGdzocOo9P8A= +golang.org/x/tools v0.0.0-20200410194907-79a7a3126eef/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= +golang.org/x/tools v0.1.8 h1:P1HhGGuLW4aAclzjtmJdf0mJOjVUZUzOTqkAkWL+l6w= +golang.org/x/tools v0.1.8/go.mod h1:nABZi5QlRsZVlzPpHl034qft6wpY4eDcsTt5AaioBiU= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190513163551-3ee3066db522/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= @@ -187,5 +230,8 @@ gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.6 h1:W18jzjh8mfPez+AwGLxmOImucz/IFjpNlrKVnaj2YVc= +honnef.co/go/tools v0.0.1-2020.1.6/go.mod h1:pyyisuGw24ruLjrr1ddx39WE0y9OooInRzEYLhQB2YY= +honnef.co/go/tools v0.2.2 h1:MNh1AVMyVX23VUHE2O27jm6lNj3vjO5DexS4A1xvnzk= +honnef.co/go/tools v0.2.2/go.mod h1:lPVVZ2BS5TfnjLyizF7o7hv7j9/L+8cZY2hLyjP9cGY= diff --git a/internal/backend/apply/apply.go b/internal/backend/apply/apply.go new file mode 100644 index 00000000..ff135b70 --- /dev/null +++ b/internal/backend/apply/apply.go @@ -0,0 +1,300 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package apply contains code for applying mutations to tables. +package apply + +// This file contains code repackaged from sink.go. + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "sort" + "strings" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// apply will upsert mutations and deletions into a target table. +type apply struct { + cancel context.CancelFunc + target ident.Table + + mu struct { + sync.RWMutex + columns []sinktypes.ColData + pks []sinktypes.ColData + + sql struct { + // DELETE FROM t WHERE ("pk0", "pk1") IN (SELECT unnest($1::INT8[]), unnest($2::STRING[])) + delete string + // UPSERT INTO t ("pk0", "pk1") SELECT unnest($1::INT8[]), unnest($2::STRING[]) + upsert string + } + } +} + +var _ sinktypes.Applier = (*apply)(nil) + +// newApply constructs an apply by inspecting the target table. +func newApply(w sinktypes.Watcher, target ident.Table, +) (_ *apply, cancel func(), _ error) { + ch, cancel, err := w.Watch(target) + if err != nil { + return nil, cancel, err + } + + a := &apply{cancel: cancel, target: target} + // Wait for the initial column data to be loaded. + select { + case colData := <-ch: + a.refreshUnlocked(colData) + case <-time.After(10 * time.Second): + return nil, cancel, errors.Errorf("column data timeout for %s", target) + } + + // Background routine to keep the column data refreshed. + go func() { + for { + colData, open := <-ch + if !open { + return + } + a.refreshUnlocked(colData) + log.Printf("refreshed schema for table %s", a.target) + } + }() + + return a, cancel, nil +} + +// Apply applies the mutations to the target table. +func (a *apply) Apply( + ctx context.Context, tx sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + deletes, r := batches.Mutation() + defer r() + upserts, r := batches.Mutation() + defer r() + + a.mu.RLock() + defer a.mu.RUnlock() + + if len(a.mu.columns) == 0 { + return errors.Errorf("no ColumnData available for %s", a.target) + } + + for i := range muts { + if muts[i].Delete() { + deletes = append(deletes, muts[i]) + if len(deletes) == cap(deletes) { + if err := a.deleteLocked(ctx, tx, deletes); err != nil { + return err + } + deletes = deletes[:0] + } + } else { + upserts = append(upserts, muts[i]) + if len(upserts) == cap(upserts) { + if err := a.upsertLocked(ctx, tx, upserts); err != nil { + return err + } + upserts = upserts[:0] + } + } + } + + if err := a.deleteLocked(ctx, tx, deletes); err != nil { + return err + } + return a.upsertLocked(ctx, tx, upserts) +} + +func (a *apply) deleteLocked( + ctx context.Context, db sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + if len(muts) == 0 { + return nil + } + + batch := &pgx.Batch{} + + for i := range muts { + dec := json.NewDecoder(bytes.NewReader(muts[i].Key)) + dec.UseNumber() + + args := make([]interface{}, 0, len(a.mu.pks)) + if err := dec.Decode(&args); err != nil { + return errors.WithStack(err) + } + + if len(args) != len(a.mu.pks) { + return errors.Errorf( + "schema drift detected: "+ + "inconsistent number of key colums: "+ + "received %d expect %d: "+ + "key %s@%s", + len(args), len(a.mu.pks), string(muts[i].Key), muts[i].Time) + } + + batch.Queue(a.mu.sql.delete, args...) + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + _, err := res.Exec() + if err != nil { + return errors.Wrap(err, a.mu.sql.delete) + } + } + + return nil +} + +func (a *apply) upsertLocked( + ctx context.Context, db sinktypes.Batcher, muts []sinktypes.Mutation, +) error { + if len(muts) == 0 { + return nil + } + + batch := &pgx.Batch{} + + for i := range muts { + dec := json.NewDecoder(bytes.NewReader(muts[i].Data)) + dec.UseNumber() + + temp := make(map[string]interface{}) + if err := dec.Decode(&temp); err != nil { + return errors.WithStack(err) + } + + args := make([]interface{}, 0, len(a.mu.columns)) + for _, col := range a.mu.columns { + rawColName := col.Name.Raw() + decoded, ok := temp[rawColName] + delete(temp, rawColName) + if col.Ignored { + continue + } + // We're not going to worry about missing columns in the + // mutation to be applied unless it's a PK. If other new + // columns have been added to the target table, the source + // table might not have them yet. + if !ok && col.Primary { + return errors.Errorf( + "schema drift detected in %s: "+ + "missing PK column %s: "+ + "key %s@%s", + a.target, rawColName, + string(muts[i].Key), muts[i].Time) + } + args = append(args, decoded) + } + batch.Queue(a.mu.sql.upsert, args...) + + // If new columns have been added in the source table, but not + // in the destination, we want to error out. + if len(temp) != 0 { + var unexpected []string + for k := range temp { + unexpected = append(unexpected, k) + } + sort.Strings(unexpected) + return errors.Errorf( + "schema drift detected in %s: "+ + "unexpected columns %v: "+ + "key %s@%s", + a.target, unexpected, string(muts[i].Key), muts[i].Time) + } + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + if _, err := res.Exec(); err != nil { + return errors.Wrap(err, a.mu.sql.upsert) + } + } + return nil +} + +// refreshUnlocked updates the apply with new column information. +func (a *apply) refreshUnlocked(colData []sinktypes.ColData) { + a.mu.Lock() + defer a.mu.Unlock() + + var delete, upsert strings.Builder + lastPkColumn := 0 + + _, _ = fmt.Fprintf(&delete, "DELETE FROM %s WHERE (", a.target) + _, _ = fmt.Fprintf(&upsert, "UPSERT INTO %s (", a.target) + for i := range colData { + if colData[i].Ignored { + continue + } + if colData[i].Primary { + if i > 0 { + lastPkColumn = i + delete.WriteString(", ") + } + delete.WriteString(colData[i].Name.String()) + } + if i > 0 { + upsert.WriteString(", ") + } + upsert.WriteString(colData[i].Name.String()) + } + delete.WriteString(") IN (SELECT ") + upsert.WriteString(") SELECT ") + for i := range colData { + if colData[i].Ignored { + continue + } + if colData[i].Primary { + if i > 0 { + delete.WriteString(", ") + } + _, _ = fmt.Fprintf(&delete, "$%d::%s", i+1, colData[i].Type) + } + if i > 0 { + upsert.WriteString(", ") + } + + // The GEO types need some additional help to convert them from + // the JSON-style representations that we get. + switch colData[i].Type { + case "GEOGRAPHY": + _, _ = fmt.Fprintf(&upsert, "st_geogfromgeojson($%d::jsonb)", i+1) + case "GEOMETRY": + _, _ = fmt.Fprintf(&upsert, "st_geomfromgeojson($%d::jsonb)", i+1) + default: + _, _ = fmt.Fprintf(&upsert, "$%d::%s", i+1, colData[i].Type) + } + } + delete.WriteString(")") + + a.mu.columns = colData + a.mu.pks = colData[:lastPkColumn+1] + a.mu.sql.delete = delete.String() + a.mu.sql.upsert = upsert.String() +} diff --git a/internal/backend/apply/apply_test.go b/internal/backend/apply/apply_test.go new file mode 100644 index 00000000..ddb13090 --- /dev/null +++ b/internal/backend/apply/apply_test.go @@ -0,0 +1,416 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package apply + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/stretchr/testify/assert" +) + +// This test inserts and deletes rows from a trivial table. +func TestApply(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + type Payload struct { + Pk0 int `json:"pk0"` + Pk1 string `json:"pk1"` + } + tbl, err := sinktest.CreateTable(ctx, dbName, + "CREATE TABLE %s (pk0 INT, pk1 STRING, PRIMARY KEY (pk0,pk1))") + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + t.Run("smoke", func(t *testing.T) { + a := assert.New(t) + count := 3 * batches.Size() + adds := make([]sinktypes.Mutation, count) + dels := make([]sinktypes.Mutation, count) + for i := range adds { + p := Payload{Pk0: i, Pk1: fmt.Sprintf("X%dX", i)} + bytes, err := json.Marshal(p) + a.NoError(err) + adds[i] = sinktypes.Mutation{Data: bytes} + + bytes, err = json.Marshal([]interface{}{p.Pk0, p.Pk1}) + a.NoError(err) + dels[i] = sinktypes.Mutation{Key: bytes} + } + + // Verify insertion + a.NoError(app.Apply(ctx, dbInfo.Pool(), adds)) + ct, err := tbl.RowCount(ctx) + a.Equal(count, ct) + a.NoError(err) + + // Verify that they can be deleted. + a.NoError(app.Apply(ctx, dbInfo.Pool(), dels)) + ct, err = tbl.RowCount(ctx) + a.Equal(0, ct) + a.NoError(err) + }) + + // Verify unexpected incoming column + t.Run("unexpected", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Data: []byte(`{"pk0":1, "pk1":0, "no_good":true}`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "unexpected columns [no_good]") + } + }) + + t.Run("missing_key_upsert", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Data: []byte(`{"pk0":1}`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "missing PK column pk1") + } + }) + + t.Run("missing_key_delete_too_few", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Key: []byte(`[1]`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "received 1 expect 2") + } + }) + + t.Run("missing_key_delete_too_many", func(t *testing.T) { + a := assert.New(t) + if err := app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{ + { + Key: []byte(`[1, 2, 3]`), + }, + }); a.Error(err) { + t.Log(err.Error()) + a.Contains(err.Error(), "received 3 expect 2") + } + }) +} + +// This is a smoke test, copied from main_test.go to ensure that +// all supported data types can be applied. It works by creating +// a test table for each type and using CRDB's built-in to_jsonb() +// function to create a payload. +func TestAllDataTypes(t *testing.T) { + testcases := []struct { + name string + columnType string + columnValue string + indexable bool + }{ + {`string_array`, `STRING[]`, `{"sky","road","car"}`, false}, + {`string_array_null`, `STRING[]`, ``, false}, + {`int_array`, `INT[]`, `{1,2,3}`, false}, + {`int_array_null`, `INT[]`, ``, false}, + {`serial_array`, `SERIAL[]`, `{148591304110702593,148591304110702594,148591304110702595}`, false}, + {`serial_array_null`, `SERIAL[]`, ``, false}, + {`bit`, `VARBIT`, `10010101`, true}, + {`bit_null`, `VARBIT`, ``, false}, + {`bool`, `BOOL`, `true`, true}, + {`bool_array`, `BOOL[]`, `{true, false, true}`, false}, + {`bool_null`, `BOOL`, ``, false}, + {`bytes`, `BYTES`, `b'\141\061\142\062\143\063'`, true}, + {`collate`, `STRING COLLATE de`, `'a1b2c3' COLLATE de`, true}, + {`collate_null`, `STRING COLLATE de`, ``, false}, + {`date`, `DATE`, `2016-01-25`, true}, + {`date_null`, `DATE`, ``, false}, + {`decimal`, `DECIMAL`, `1.2345`, true}, + {`decimal_null`, `DECIMAL`, ``, false}, + {`float`, `FLOAT`, `1.2345`, true}, + {`float_null`, `FLOAT`, ``, false}, + {`geography`, `GEOGRAPHY`, `0101000020E6100000000000000000F03F0000000000000040`, false}, + {`geometry`, `GEOMETRY`, `010100000075029A081B9A5DC0F085C954C1F84040`, false}, + {`inet`, `INET`, `192.168.0.1`, true}, + {`inet_null`, `INET`, ``, false}, + {`int`, `INT`, `12345`, true}, + {`int_null`, `INT`, ``, false}, + {`interval`, `INTERVAL`, `2h30m30s`, true}, + {`interval_null`, `INTERVAL`, ``, false}, + { + `jsonb`, + `JSONB`, + ` + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ], + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ] + ] + } + ], + "map": { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ], + { + "string": "Lola", + "bool": true, + "number": 547, + "float": 123.456, + "array": [ + "lola", + true, + 547, + 123.456, + [ + "lola", + true, + 547, + 123.456 + ] + ] + } + ] + } + } + `, + false, + }, + {`jsonb_null`, `JSONB`, ``, false}, + {`serial`, `SERIAL`, `148591304110702593`, true}, + // serial cannot be null + {`string`, `STRING`, `a1b2c3`, true}, + {`string_null`, `STRING`, ``, false}, + {`string_escape`, `STRING`, `a1\b/2?c"3`, true}, + {`time`, `TIME`, `01:23:45.123456`, true}, + {`time_null`, `TIME`, ``, false}, + {`timestamp`, `TIMESTAMP`, `2016-01-25 10:10:10`, true}, + {`timestamp_null`, `TIMESTAMP`, ``, false}, + {`timestamptz`, `TIMESTAMPTZ`, `2016-01-25 10:10:10-05:00`, true}, + {`timestamptz_null`, `TIMESTAMPTZ`, ``, false}, + {`uuid`, `UUID`, `7f9c24e8-3b12-4fef-91e0-56a2d5a246ec`, true}, + {`uuid_null`, `UUID`, ``, false}, + } + + a := assert.New(t) + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + a := assert.New(t) + + // Place the PK index on the data type under test, if allowable. + var create string + if tc.indexable { + create = fmt.Sprintf("CREATE TABLE %%s (val %s primary key, k int)", tc.columnType) + } else { + create = fmt.Sprintf("CREATE TABLE %%s (k int primary key, val %s)", tc.columnType) + } + + tbl, err := sinktest.CreateTable(ctx, dbName, create) + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + if !a.NoError(watcher.Refresh(ctx, dbInfo.Pool())) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + var jsonValue string + if tc.columnValue == "" { + jsonValue = "null" + } else { + q := fmt.Sprintf("SELECT to_json($1::%s)::string", tc.columnType) + if !a.NoError(dbInfo.Pool().QueryRow(ctx, q, tc.columnValue).Scan(&jsonValue)) { + return + } + } + t.Log(jsonValue) + + mut := sinktypes.Mutation{ + Data: []byte(fmt.Sprintf(`{"k":1,"val":%s}`, jsonValue)), + } + a.NoError(app.Apply(ctx, dbInfo.Pool(), []sinktypes.Mutation{mut})) + + var jsonFound string + a.NoError(dbInfo.Pool().QueryRow(ctx, + fmt.Sprintf("SELECT ifnull(to_json(val)::string, 'null') FROM %s", tbl), + ).Scan(&jsonFound)) + a.Equal(jsonValue, jsonFound) + }) + } +} + +// Ensure that if stored computed columns are present, we don't +// try to write to them and that we correctly ignore those columns +// in incoming payloads. +func TestVirtualColumns(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + type Payload struct { + A int `json:"a"` + B int `json:"b"` + C int `json:"c"` + X int `json:"x,omitempty"` + } + tbl, err := sinktest.CreateTable(ctx, dbName, + "CREATE TABLE %s (a INT, b INT, c INT AS (a + b) STORED, PRIMARY KEY (a,b))") + if !a.NoError(err) { + return + } + + watcher, err := watchers.Get(ctx, dbName) + if !a.NoError(err) { + return + } + + app, cancel, err := newApply(watcher, tbl.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + t.Log(app.mu.sql.delete) + t.Log(app.mu.sql.upsert) + + t.Run("computed-is-ignored", func(t *testing.T) { + a := assert.New(t) + p := Payload{A: 1, B: 2, C: 3} + bytes, err := json.Marshal(p) + a.NoError(err) + muts := []sinktypes.Mutation{{Data: bytes}} + + a.NoError(app.Apply(ctx, dbInfo.Pool(), muts)) + }) + + t.Run("unknown-still-breaks", func(t *testing.T) { + a := assert.New(t) + p := Payload{A: 1, B: 2, C: 3, X: -1} + bytes, err := json.Marshal(p) + a.NoError(err) + muts := []sinktypes.Mutation{{Data: bytes}} + + err = app.Apply(ctx, dbInfo.Pool(), muts) + if a.Error(err) { + a.Contains(err.Error(), "unexpected columns") + } + }) +} diff --git a/internal/backend/apply/factory.go b/internal/backend/apply/factory.go new file mode 100644 index 00000000..12c990b9 --- /dev/null +++ b/internal/backend/apply/factory.go @@ -0,0 +1,73 @@ +package apply + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" +) + +// factory vends singleton instance of apply. +type factory struct { + watchers sinktypes.Watchers + mu struct { + sync.RWMutex + cleanup []func() + instances map[ident.Table]*apply + } +} + +var _ sinktypes.Appliers = (*factory)(nil) + +// New returns an instance of sinktypes.Appliers. +func New(watchers sinktypes.Watchers) (_ sinktypes.Appliers, cancel func()) { + f := &factory{watchers: watchers} + f.mu.instances = make(map[ident.Table]*apply) + return f, func() { + f.mu.Lock() + defer f.mu.Unlock() + for _, fn := range f.mu.cleanup { + fn() + } + f.mu.cleanup = nil + f.mu.instances = nil + } +} + +// Get returns a memoized instance of the Applier for the table. +func (f *factory) Get( + ctx context.Context, table ident.Table, +) (sinktypes.Applier, error) { + if ret := f.getUnlocked(table); ret != nil { + return ret, nil + } + return f.createUnlocked(ctx, table) +} + +func (f *factory) createUnlocked( + ctx context.Context, table ident.Table, +) (*apply, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if ret := f.mu.instances[table]; ret != nil { + return ret, nil + } + watcher, err := f.watchers.Get(ctx, table.Database()) + if err != nil { + return nil, err + } + ret, cancel, err := newApply(watcher, table) + if err == nil { + f.mu.cleanup = append(f.mu.cleanup, cancel) + f.mu.instances[table] = ret + } + return ret, err +} + +func (f *factory) getUnlocked(table ident.Table) *apply { + f.mu.RLock() + defer f.mu.RUnlock() + return f.mu.instances[table] +} diff --git a/internal/backend/mutation/factory.go b/internal/backend/mutation/factory.go new file mode 100644 index 00000000..288c37b9 --- /dev/null +++ b/internal/backend/mutation/factory.go @@ -0,0 +1,62 @@ +package mutation + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4/pgxpool" +) + +type factory struct { + db *pgxpool.Pool + stagingDB ident.Ident + + mu struct { + sync.RWMutex + instances map[ident.Table]*store + } +} + +var _ sinktypes.MutationStores = (*factory)(nil) + +// New returns an instance of sinktypes.MutationStores that stores +// temporary data in the given SQL database. +func New(db *pgxpool.Pool, stagingDB ident.Ident) sinktypes.MutationStores { + f := &factory{ + db: db, + stagingDB: stagingDB, + } + f.mu.instances = make(map[ident.Table]*store) + return f +} + +// Get returns a memorized instance of a store for the given table. +func (f *factory) Get(ctx context.Context, target ident.Table) (sinktypes.MutationStore, error) { + if ret := f.getUnlocked(target); ret != nil { + return ret, nil + } + return f.createUnlocked(ctx, target) +} + +func (f *factory) createUnlocked(ctx context.Context, table ident.Table) (*store, error) { + f.mu.Lock() + defer f.mu.Unlock() + + if ret := f.mu.instances[table]; ret != nil { + return ret, nil + } + + ret, err := newStore(ctx, f.db, f.stagingDB, table) + if err == nil { + f.mu.instances[table] = ret + } + return ret, err +} + +func (f *factory) getUnlocked(table ident.Table) *store { + f.mu.RLock() + defer f.mu.RUnlock() + return f.mu.instances[table] +} diff --git a/internal/backend/mutation/store.go b/internal/backend/mutation/store.go new file mode 100644 index 00000000..122396b2 --- /dev/null +++ b/internal/backend/mutation/store.go @@ -0,0 +1,159 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package mutation defines a means of storing and retrieving mutations +// to be applied to a table. +package mutation + +// The code in this file is reworked from sink_table.go. + +import ( + "context" + "fmt" + "log" + "strings" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// store implements a storage and retrieval mechanism for staging +// Mutation instances. +type store struct { + // The staging table that holds the mutations. + stage ident.Table + + // Compute SQL fragments exactly once on startup. + sql struct { + drain string // drain rows from the staging table + store string // store mutations + } +} + +var _ sinktypes.MutationStore = (*store)(nil) + +// newStore constructs a new mutation store that will track pending +// mutations to be applied to the given target table. +func newStore( + ctx context.Context, db pgxtype.Querier, stagingDB ident.Ident, target ident.Table, +) (*store, error) { + mangledName := "_" + strings.Join( + []string{target.Database().Raw(), target.Schema().Raw(), target.Table().Raw()}, "_") + stage := ident.NewTable(stagingDB, ident.Public, ident.New(mangledName)) + + if err := retry.Execute(ctx, db, fmt.Sprintf(` +CREATE TABLE IF NOT EXISTS %s ( + nanos INT NOT NULL, + logical INT NOT NULL, + key STRING NOT NULL, + mut JSONB NOT NULL, + PRIMARY KEY (nanos, logical, key) +)`, stage)); err != nil { + return nil, err + } + + s := &store{stage: stage} + + s.sql.drain = fmt.Sprintf(drainTemplate, stage) + s.sql.store = fmt.Sprintf(putTemplate, stage) + + return s, nil +} + +const drainTemplate = ` +WITH d AS (DELETE FROM %s +WHERE (nanos, logical) BETWEEN ($1, $2) AND ($3, $4) +RETURNING nanos, logical, key, mut) +SELECT DISTINCT ON (key) nanos, logical, key, mut FROM d +ORDER BY key ASC, nanos DESC, logical DESC +` + +// Drain dequeues mutations between the given timestamps. +func (s *store) Drain( + ctx context.Context, tx pgxtype.Querier, prev, next hlc.Time, +) ([]sinktypes.Mutation, error) { + var ret []sinktypes.Mutation + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, s.sql.drain, + prev.Nanos(), prev.Logical(), next.Nanos(), next.Logical(), + ) + if err != nil { + return err + } + defer rows.Close() + + // Clear any previous loop, but save the backing array. + ret = ret[:0] + for rows.Next() { + var mut sinktypes.Mutation + var nanos int64 + var logical int + if err := rows.Scan(&nanos, &logical, &mut.Key, &mut.Data); err != nil { + return err + } + mut.Time = hlc.New(nanos, logical) + ret = append(ret, mut) + } + return nil + }) + return ret, errors.Wrapf(err, "drain %s [%s, %s]", s.stage, prev, next) +} + +// Arrays of JSONB aren't implemented +// https://github.com/cockroachdb/cockroach/issues/23468 +const putTemplate = `UPSERT INTO %s (nanos, logical, key, mut) VALUES ($1, $2, $3, $4)` + +// Store stores some number of Mutations into the database. +func (s *store) Store( + ctx context.Context, db sinktypes.Batcher, mutations []sinktypes.Mutation, +) error { + return batches.Batch(len(mutations), func(begin, end int) error { + return s.putOne(ctx, db, mutations[begin:end]) + }) +} + +func (s *store) putOne( + ctx context.Context, db sinktypes.Batcher, mutations []sinktypes.Mutation, +) error { + batch := &pgx.Batch{} + + for i := range mutations { + var jsonText string + if mutations[i].Delete() { + jsonText = "null" + } else { + jsonText = string(mutations[i].Data) + } + + batch.Queue(s.sql.store, + mutations[i].Time.Nanos(), + mutations[i].Time.Logical(), + string(mutations[i].Key), + jsonText) + } + + res := db.SendBatch(ctx, batch) + defer res.Close() + + for i, j := 0, batch.Len(); i < j; i++ { + if _, err := res.Exec(); err != nil { + return errors.Wrap(err, s.sql.store) + } + } + + log.Printf("staged %d entries for %s", len(mutations), s.stage) + return nil +} diff --git a/internal/backend/mutation/store_test.go b/internal/backend/mutation/store_test.go new file mode 100644 index 00000000..1e26b051 --- /dev/null +++ b/internal/backend/mutation/store_test.go @@ -0,0 +1,88 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package mutation + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +// TestPutAndDrain will insert and dequeue a batch of Mutations. +func TestPutAndDrain(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + a.NotEmpty(dbInfo.Version()) + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + factory := New(dbInfo.Pool(), ident.StagingDB) + + dummyTarget := ident.NewTable( + targetDB, ident.Public, ident.New("target")) + + s, err := factory.Get(ctx, dummyTarget) + if !a.NoError(err) { + return + } + a.NotNil(s) + + stagingTable := s.(*store).stage + + // Cook test data. + total := 3 * batches.Size() + muts := make([]sinktypes.Mutation, total) + for i := range muts { + muts[i] = sinktypes.Mutation{ + Data: []byte(fmt.Sprintf(`{"pk": %d}`, i)), + Key: []byte(fmt.Sprintf(`[%d]`, i)), + Time: hlc.New(int64(1000*i), i), + } + } + + // Insert. + a.NoError(s.Store(ctx, dbInfo.Pool(), muts)) + + // Sanity-check table. + count, err := sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(total, count) + + // Ensure that data insertion is idempotent. + a.NoError(s.Store(ctx, dbInfo.Pool(), muts)) + + // Sanity-check table. + count, err = sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(total, count) + + // Dequeue. + ret, err := s.Drain(ctx, dbInfo.Pool(), + hlc.Zero(), hlc.New(int64(1000*total+1), 0)) + a.NoError(err) + a.Len(ret, total) + + // Should be empty now. + count, err = sinktest.GetRowCount(ctx, dbInfo.Pool(), stagingTable) + a.NoError(err) + a.Equal(0, count) +} diff --git a/internal/backend/schemawatch/coldata.go b/internal/backend/schemawatch/coldata.go new file mode 100644 index 00000000..372fb364 --- /dev/null +++ b/internal/backend/schemawatch/coldata.go @@ -0,0 +1,86 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" +) + +func colSliceEqual(a, b []sinktypes.ColData) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// Retrieve the primary key columns in their index-order, then append +// any remaining non-generated columns. +const sqlColumnsQuery = ` +WITH +pks AS ( + SELECT column_name, seq_in_index FROM [SHOW INDEX FROM %[1]s] + WHERE index_name = 'primary' AND NOT storing), +cols AS ( + SELECT column_name, data_type, generation_expression != '' AS ignored + FROM [SHOW COLUMNS FROM %[1]s]), +ordered AS ( + SELECT column_name, min(ifnull(pks.seq_in_index, 2048)) AS seq_in_index FROM + cols LEFT JOIN pks USING (column_name) + GROUP BY column_name) +SELECT cols.column_name, pks.seq_in_index IS NOT NULL, cols.data_type, cols.ignored +FROM cols +JOIN ordered USING (column_name) +LEFT JOIN pks USING (column_name) +ORDER BY ordered.seq_in_index, cols.column_name +` + +// getColumns returns the column names for the primary key columns in +// their index-order, followed by all other columns that should be +// mutated. +func getColumns( + ctx context.Context, tx pgxtype.Querier, table ident.Table, +) ([]sinktypes.ColData, error) { + stmt := fmt.Sprintf(sqlColumnsQuery, table) + + var columns []sinktypes.ColData + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, stmt) + if err != nil { + return err + } + defer rows.Close() + + // Clear from previous loop. + columns = columns[:0] + for rows.Next() { + var column sinktypes.ColData + var name string + if err := rows.Scan(&name, &column.Primary, &column.Type, &column.Ignored); err != nil { + return err + } + column.Name = ident.New(name) + columns = append(columns, column) + } + return nil + }) + return columns, err +} diff --git a/internal/backend/schemawatch/coldata_test.go b/internal/backend/schemawatch/coldata_test.go new file mode 100644 index 00000000..b7b56e87 --- /dev/null +++ b/internal/backend/schemawatch/coldata_test.go @@ -0,0 +1,133 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +// This file contains code repackaged from sql_test.go. + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/stretchr/testify/assert" +) + +func TestGetColumns(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + // Create the test db + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + type testcase struct { + tableSchema string + primaryKeys []string + dataCols []string + } + testcases := []testcase{ + { + "a INT", + []string{"rowid"}, + []string{"a"}, + }, + { + "a INT PRIMARY KEY", + []string{"a"}, + nil, + }, + { + "a INT, b INT, PRIMARY KEY (a,b)", + []string{"a", "b"}, + nil, + }, + { + "a INT, b INT, PRIMARY KEY (b,a)", + []string{"b", "a"}, + nil, + }, + { + "a INT, b INT, c INT, PRIMARY KEY (b,a,c)", + []string{"b", "a", "c"}, + nil, + }, + { + "a INT, b INT, q INT, c INT, r INT, PRIMARY KEY (b,a,c)", + []string{"b", "a", "c"}, + []string{"q", "r"}, + }, + { + "a INT, b INT, r INT, c INT, q INT, PRIMARY KEY (b,a,c) USING HASH WITH BUCKET_COUNT = 8", + []string{"ignored_crdb_internal_a_b_c_shard_8", "b", "a", "c"}, + []string{"q", "r"}, + }, + // Ensure that computed columns are ignored. + { + tableSchema: "a INT, b INT, " + + "c INT AS (a + b) STORED, " + + "PRIMARY KEY (a,b)", + primaryKeys: []string{"a", "b"}, + dataCols: []string{"ignored_c"}, + }, + } + + // Virtual columns not supported before v21.1 + if !strings.Contains(dbInfo.Version(), "v20.2.") { + testcases = append(testcases, + testcase{ + tableSchema: "a INT, b INT, " + + "c INT AS (a + b) STORED, " + + "d INT AS (a + b) VIRTUAL, " + + "PRIMARY KEY (a,b)", + primaryKeys: []string{"a", "b"}, + dataCols: []string{"ignored_c", "ignored_d"}, + }, + ) + } + + for i, test := range testcases { + t.Run(fmt.Sprintf("%d:%s", i, test.tableSchema), func(t *testing.T) { + a := assert.New(t) + + tableName := ident.NewTable(dbName, ident.Public, ident.Newf("test_%d", i)) + if !a.NoError(retry.Execute(ctx, dbInfo.Pool(), + fmt.Sprintf(`CREATE TABLE %s ( %s )`, tableName, test.tableSchema))) { + return + } + colData, err := getColumns(ctx, dbInfo.Pool(), tableName) + if !a.NoError(err) { + return + } + var primaryKeys, dataCols []string + for i := range colData { + a.NotEmpty(colData[i].Type) + name := colData[i].Name.Raw() + if colData[i].Ignored { + name = "ignored_" + name + } + if colData[i].Primary { + primaryKeys = append(primaryKeys, name) + } else { + dataCols = append(dataCols, name) + } + } + a.Equal(test.primaryKeys, primaryKeys) + a.Equal(test.dataCols, dataCols) + }) + } +} diff --git a/internal/backend/schemawatch/watcher.go b/internal/backend/schemawatch/watcher.go new file mode 100644 index 00000000..b40c9f6a --- /dev/null +++ b/internal/backend/schemawatch/watcher.go @@ -0,0 +1,202 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package schemawatch contains code to allow the schema of a target +// database to be queried and monitored. +package schemawatch + +import ( + "context" + "flag" + "fmt" + "log" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/pkg/errors" +) + +// RefreshDelay controls how ofter a Watcher will refresh its schema. +var RefreshDelay = flag.Duration("schemaRefresh", time.Minute, + "how often to scan for schema changes") + +// dbSchema is a simplified representation of a SQL database's schema. +type dbSchema map[ident.Table][]sinktypes.ColData + +// A Watcher maintains an internal cache of a database's schema, +// allowing callers to receive notifications of schema changes. +type Watcher struct { + // All goroutines used by Watch use this as a parent context. + background context.Context + dbName ident.Ident + delay time.Duration + + mu struct { + sync.RWMutex + cond sync.Cond // Conditional on the RLocker + data dbSchema + } + + sql struct { + tables string + } +} + +var _ sinktypes.Watcher = (*Watcher)(nil) + +// newWatcher constructs a new Watcher to monitor the table schema in the +// named database. The returned Watcher will internally refresh +// until the cancel callback is executed. +func newWatcher( + ctx context.Context, tx pgxtype.Querier, dbName ident.Ident, +) (_ *Watcher, cancel func(), _ error) { + background, cancel := context.WithCancel(context.Background()) + + w := &Watcher{ + background: background, + delay: *RefreshDelay, + dbName: dbName, + } + w.mu.cond.L = w.mu.RLocker() + w.sql.tables = fmt.Sprintf(tableTemplate, dbName) + + // Initial data load to sanity-check and make ready. + data, err := w.getTables(ctx, tx) + if err != nil { + cancel() + return nil, nil, err + } + w.mu.data = data + + go func() { + for { + select { + case <-background.Done(): + return + case <-time.After(w.delay): + } + + if err := w.Refresh(background, tx); err != nil { + log.Printf("could not refresh table data: %v", err) + } + } + }() + + return w, cancel, nil +} + +// Refresh immediately refreshes the Watcher's internal cache. This +// is intended for use by tests. +func (w *Watcher) Refresh(ctx context.Context, tx pgxtype.Querier) error { + data, err := w.getTables(ctx, tx) + if err != nil { + log.Printf("could not refresh table data: %v", err) + } + + w.mu.Lock() + w.mu.data = data + w.mu.Unlock() + w.mu.cond.Broadcast() + return nil +} + +// Snapshot returns the latest known schema for the target database. +func (w *Watcher) Snapshot() map[ident.Table][]sinktypes.ColData { + w.mu.RLock() + defer w.mu.RUnlock() + + ret := make(map[ident.Table][]sinktypes.ColData, len(w.mu.data)) + for name, cols := range w.mu.data { + ret[name] = append(cols[:0], cols...) + } + return ret +} + +// Watch will send updated column data for the given table until the +// watch is canceled. The requested table must already be known to the +// Watcher. +func (w *Watcher) Watch(table ident.Table) (_ <-chan []sinktypes.ColData, cancel func(), _ error) { + w.mu.RLock() + defer w.mu.RUnlock() + if _, ok := w.mu.data[table]; !ok { + return nil, nil, errors.Errorf("unknown table %s", table) + } + + ctx, cancel := context.WithCancel(w.background) + ch := make(chan []sinktypes.ColData, 1) + + go func() { + defer close(ch) + + // All code below is read-locked, so we can't miss updates. + w.mu.cond.L.Lock() + defer w.mu.cond.L.Unlock() + + var last []sinktypes.ColData + for { + next, ok := w.mu.data[table] + // Respond to context cancellation or dropping the table. + if !ok || ctx.Err() != nil { + return + } + + // We're read-locked, so this isn't hugely critical. + if !colSliceEqual(last, next) { + select { + case <-ctx.Done(): + return + case ch <- next: + last = next + default: + log.Fatal("ColData watcher excessively behind") + } + } + + w.mu.cond.Wait() + } + }() + return ch, cancel, nil +} + +const tableTemplate = `SELECT schema_name, table_name FROM [SHOW TABLES FROM %s]` + +func (w *Watcher) getTables( + ctx context.Context, tx pgxtype.Querier, +) (dbSchema, error) { + var ret dbSchema + err := retry.Retry(ctx, func(ctx context.Context) error { + rows, err := tx.Query(ctx, w.sql.tables) + if err != nil { + return err + } + defer rows.Close() + + ret = make(dbSchema) + for rows.Next() { + var schema, table string + if err := rows.Scan(&schema, &table); err != nil { + return err + } + tbl := ident.NewTable(w.dbName, ident.New(schema), ident.New(table)) + cols, err := getColumns(ctx, tx, tbl) + if err != nil { + return err + } + ret[tbl] = cols + } + return nil + }) + + return ret, errors.Wrap(err, w.sql.tables) +} diff --git a/internal/backend/schemawatch/watcher_test.go b/internal/backend/schemawatch/watcher_test.go new file mode 100644 index 00000000..4ac03f24 --- /dev/null +++ b/internal/backend/schemawatch/watcher_test.go @@ -0,0 +1,99 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "fmt" + "testing" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/stretchr/testify/assert" +) + +func TestWatch(t *testing.T) { + a := assert.New(t) + + // Override the delay to exercise the background goroutine. + *RefreshDelay = time.Second + defer func() { *RefreshDelay = time.Minute }() + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + // Bootstrap column. + tblInfo, err := sinktest.CreateTable(ctx, dbName, "CREATE TABLE %s (pk INT PRIMARY KEY)") + if !a.NoError(err) { + return + } + + w, cancel, err := newWatcher(ctx, dbInfo.Pool(), dbName) + if !a.NoError(err) { + return + } + defer cancel() + + ch, cancel, err := w.Watch(tblInfo.Name()) + if !a.NoError(err) { + return + } + defer cancel() + + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel data") + case data := <-ch: + if a.Len(data, 1) { + a.Equal("pk", data[0].Name.Raw()) + } + } + + // Add a column and expect to see it. + if !a.NoError(retry.Execute(ctx, dbInfo.Pool(), + fmt.Sprintf("ALTER TABLE %s ADD COLUMN v STRING", tblInfo.Name()))) { + return + } + + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel data") + case data := <-ch: + if a.Len(data, 2) { + a.Equal("pk", data[0].Name.Raw()) + a.Equal("v", data[1].Name.Raw()) + } + } + + // Expect the channel to close if the table is dropped. + if !a.NoError(tblInfo.DropTable(ctx)) { + return + } + select { + case <-time.After(10 * time.Second): + a.FailNow("timed out waiting for channel close") + case _, open := <-ch: + a.False(open) + } + + // Check that we error out quickly on unknown tables. + ch, cancel, err = w.Watch(ident.NewTable(dbName, ident.Public, ident.New("blah"))) + a.Nil(ch) + a.Nil(cancel) + a.Error(err) +} diff --git a/internal/backend/schemawatch/watchers.go b/internal/backend/schemawatch/watchers.go new file mode 100644 index 00000000..3c1b2191 --- /dev/null +++ b/internal/backend/schemawatch/watchers.go @@ -0,0 +1,83 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package schemawatch + +import ( + "context" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4/pgxpool" +) + +// Watchers is a memoizing factory for Watcher instances. +type Watchers struct { + pool *pgxpool.Pool + mu struct { + sync.RWMutex + cancels []func() + data map[ident.Ident]*Watcher + } +} + +var _ sinktypes.Watchers = (*Watchers)(nil) + +// NewWatchers creates a Watchers factory. +func NewWatchers(pool *pgxpool.Pool) (_ *Watchers, cancel func()) { + w := &Watchers{pool: pool} + w.mu.data = make(map[ident.Ident]*Watcher) + return w, w.close +} + +// Get creates or returns a memoized Watcher for the given database. +func (w *Watchers) Get(ctx context.Context, db ident.Ident) (sinktypes.Watcher, error) { + if ret := w.getUnlocked(db); ret != nil { + return ret, nil + } + return w.createUnlocked(ctx, db) +} + +// close destroys all Watcher instances associated with the factory. +func (w *Watchers) close() { + w.mu.Lock() + defer w.mu.Unlock() + for _, cancel := range w.mu.cancels { + cancel() + } + w.mu.cancels = nil + w.mu.data = make(map[ident.Ident]*Watcher) +} + +func (w *Watchers) createUnlocked(ctx context.Context, db ident.Ident) (*Watcher, error) { + w.mu.Lock() + defer w.mu.Unlock() + + if ret := w.mu.data[db]; ret != nil { + return ret, nil + } + + ret, cancel, err := newWatcher(ctx, w.pool, db) + if err != nil { + return nil, err + } + + w.mu.cancels = append(w.mu.cancels, cancel) + w.mu.data[db] = ret + return ret, nil +} + +func (w *Watchers) getUnlocked(db ident.Ident) *Watcher { + w.mu.RLock() + defer w.mu.RUnlock() + return w.mu.data[db] + +} diff --git a/internal/backend/sinktest/context.go b/internal/backend/sinktest/context.go new file mode 100644 index 00000000..62767dbf --- /dev/null +++ b/internal/backend/sinktest/context.go @@ -0,0 +1,45 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sinktest + +import ( + "context" + "flag" + "time" +) + +var caseTimout = flag.Duration( + "caseTimout", + 2*time.Minute, + "raise this value when debugging to allow individual tests to run longer", +) + +// key is a typesafe context key used by Context(). +type key struct{} + +// Context returns a per-test Context which has a common timeout +// behavior and global connection pool. This method will panic if +// the database could not be created +func Context() (context.Context, *DBInfo, context.CancelFunc) { + ctx, cancel := context.WithTimeout(context.Background(), *caseTimout) + db, err := bootstrap(ctx) + if err != nil { + panic(err) + } + ctx = context.WithValue(ctx, key{}, db) + return ctx, db, cancel +} + +// DB returns the database associated with the Context. +func DB(ctx context.Context) *DBInfo { + info, _ := ctx.Value(key{}).(*DBInfo) + return info +} diff --git a/internal/backend/sinktest/info.go b/internal/backend/sinktest/info.go new file mode 100644 index 00000000..913072d7 --- /dev/null +++ b/internal/backend/sinktest/info.go @@ -0,0 +1,69 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package sinktest + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgx/v4/pgxpool" +) + +// DBInfo encapsulates metadata and a connection to a database. +type DBInfo struct { + db *pgxpool.Pool + version string +} + +// Pool returns the underlying database connection. +func (di DBInfo) Pool() *pgxpool.Pool { return di.db } + +// Version returns the database version. +func (di DBInfo) Version() string { return di.version } + +// TableInfo provides a named table and a means to access it. +type TableInfo struct { + *DBInfo + name ident.Table +} + +// NewTableInfo constructs a TableInfo using the given name. +func NewTableInfo(db *DBInfo, name ident.Table) TableInfo { + return TableInfo{db, name} +} + +// DeleteAll deletes (not TRUNCATEs) all rows in the table. +func (ti TableInfo) DeleteAll(ctx context.Context) error { + return retry.Execute(ctx, ti.db, fmt.Sprintf("DELETE FROM %s WHERE true", ti.name)) +} + +// DropTable drops the table if it exists. +func (ti TableInfo) DropTable(ctx context.Context) error { + return retry.Execute(ctx, ti.db, fmt.Sprintf("DROP TABLE IF EXISTS %s", ti.name)) +} + +// Exec executes a single SQL statement. The sql string must include +// a single string substitution marker to receive the table name. +func (ti TableInfo) Exec(ctx context.Context, sql string, args ...interface{}) error { + return retry.Execute(ctx, ti.Pool(), fmt.Sprintf(sql, ti.Name()), args...) +} + +// Name returns the table name. +func (ti TableInfo) Name() ident.Table { return ti.name } + +// RowCount returns the number of rows in the table. +func (ti TableInfo) RowCount(ctx context.Context) (int, error) { + return GetRowCount(ctx, ti.db, ti.Name()) +} + +func (ti TableInfo) String() string { return ti.name.String() } diff --git a/internal/backend/sinktest/sinktest.go b/internal/backend/sinktest/sinktest.go new file mode 100644 index 00000000..54af1579 --- /dev/null +++ b/internal/backend/sinktest/sinktest.go @@ -0,0 +1,171 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package sinktest contains code to assist in writing tests. +package sinktest + +import ( + "context" + "flag" + "fmt" + "log" + "math/rand" + "os" + "sync" + "time" + + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" +) + +var connString = flag.String("testConnect", + "postgresql://root@localhost:26257/defaultdb?sslmode=disable&experimental_enable_hash_sharded_indexes=true", + "the connection string to use for testing") + +var globalDBInfo struct { + sync.Mutex + *DBInfo +} + +func bootstrap(ctx context.Context) (*DBInfo, error) { + globalDBInfo.Lock() + defer globalDBInfo.Unlock() + + if globalDBInfo.DBInfo != nil { + return globalDBInfo.DBInfo, nil + } + + if !flag.Parsed() { + flag.Parse() + } + + // Create the testing database + rand.Seed(time.Now().UnixNano()) + + ctx, cancel := context.WithTimeout(ctx, 10*time.Second) + defer cancel() + + pool, err := pgxpool.Connect(ctx, *connString) + if err != nil { + return nil, errors.Wrap(err, "could not open database connection") + } + globalDBInfo.DBInfo = &DBInfo{db: pool} + + if lic, ok := os.LookupEnv("COCKROACH_DEV_LICENSE"); ok { + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING cluster.organization = $1", + "Cockroach Labs - Production Testing", + ); err != nil { + return nil, errors.Wrap(err, "could not set cluster.organization") + } + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING enterprise.license = $1", lic, + ); err != nil { + return nil, errors.Wrap(err, "could not set enterprise.license") + } + } + + if err := retry.Execute(ctx, pool, + "SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { + return nil, errors.Wrap(err, "could not enable rangefeeds") + } + + if err := retry.Retry(ctx, func(ctx context.Context) error { + return pool.QueryRow(ctx, "SELECT version()").Scan(&globalDBInfo.version) + }); err != nil { + return nil, errors.Wrap(err, "could not determine cluster version") + } + + return globalDBInfo.DBInfo, nil +} + +// CreateDB creates a new testing SQL DATABASE whose lifetime is bounded +// by that of the associated context, which must be derived from the +// Context() method in this package. +func CreateDB(ctx context.Context) (dbName ident.Ident, cancel func(), _ error) { + db := DB(ctx).Pool() + dbNum := rand.Intn(10000) + name := ident.New(fmt.Sprintf("_test_db_%d", dbNum)) + + cancel = func() { + err := retry.Execute(ctx, db, fmt.Sprintf("DROP DATABASE IF EXISTS %s CASCADE", name)) + log.Printf("dropped database %s %v", name, err) + } + + // Ensure that the base database exists + if err := retry.Execute(ctx, db, fmt.Sprintf( + "CREATE DATABASE IF NOT EXISTS %s", ident.StagingDB)); err != nil { + return name, cancel, errors.WithStack(err) + } + + if err := retry.Execute(ctx, db, fmt.Sprintf( + "CREATE DATABASE IF NOT EXISTS %s", name)); err != nil { + return name, cancel, errors.WithStack(err) + } + + if err := retry.Execute(ctx, db, fmt.Sprintf( + `ALTER DATABASE %s CONFIGURE ZONE USING gc.ttlseconds = 600`, name)); err != nil { + return name, cancel, errors.WithStack(err) + } + + return name, cancel, nil +} + +// CreateTable creates a test table and returns a unique name. The +// schemaSpec parameter must have exactly one %s substitution parameter +// for the database name and table name. +func CreateTable(ctx context.Context, dbName ident.Ident, schemaSpec string) (TableInfo, error) { + var table ident.Table + db := DB(ctx) + if db == nil { + return TableInfo{}, errors.New("no database in context") + } + +outer: + for { + // Create the testing database + tableNum := rand.Intn(10000) + tableName := ident.New(fmt.Sprintf("_test_table_%d", tableNum)) + + // Find the DB. + var actualTableName string + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.Pool().QueryRow(ctx, + fmt.Sprintf("SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = $1", dbName), + tableName.Raw(), + ).Scan(&actualTableName) + }) + switch err { + case pgx.ErrNoRows: + table = ident.NewTable(dbName, ident.Public, tableName) + break outer + case nil: + continue + default: + return TableInfo{}, errors.WithStack(err) + } + } + + err := retry.Execute(ctx, db.Pool(), fmt.Sprintf(schemaSpec, table)) + return TableInfo{db, table}, errors.WithStack(err) +} + +// GetRowCount returns the number of rows in the table. +func GetRowCount(ctx context.Context, db pgxtype.Querier, name ident.Table) (int, error) { + var count int + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", name)).Scan(&count) + }) + return count, err +} diff --git a/internal/backend/timestamp/store.go b/internal/backend/timestamp/store.go new file mode 100644 index 00000000..885541b3 --- /dev/null +++ b/internal/backend/timestamp/store.go @@ -0,0 +1,85 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package timestamp implements a simple key-timestamp store. +package timestamp + +// The code in this file is adapted from resolved_table.go + +import ( + "context" + "fmt" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" + "github.com/pkg/errors" +) + +// DefaultTable is a default table name to pass to New. +var DefaultTable = ident.NewTable(ident.StagingDB, ident.Public, ident.New("_timestamps")) + +// store implements a simple key/value store for HLC timestamps. +type store struct { + sql struct { + swap string + } +} + +var _ sinktypes.TimeSwapper = (*store)(nil) + +// New constructs a store using the specified table for storage. +func New( + ctx context.Context, tx pgxtype.Querier, target ident.Table, +) (sinktypes.TimeSwapper, error) { + if err := retry.Execute(ctx, tx, fmt.Sprintf(` +CREATE TABLE IF NOT EXISTS %s ( +key STRING NOT NULL PRIMARY KEY, +nanos INT8 NOT NULL, +logical INT8 NOT NULL +) +`, target)); err != nil { + return nil, errors.WithStack(err) + } + + ret := &store{} + ret.sql.swap = fmt.Sprintf(swapTemplate, target) + + return ret, nil +} + +const swapTemplate = ` +WITH u AS (UPSERT INTO %[1]s (nanos, logical, key) VALUES ($1, $2, $3) RETURNING 0) +SELECT nanos, logical FROM %[1]s WHERE key=$3` + +// Swap updates the value associated with the key, returning the +// previous value. +func (s *store) Swap( + ctx context.Context, db pgxtype.Querier, key string, value hlc.Time, +) (hlc.Time, error) { + var nanos int64 + var logical int + err := retry.Retry(ctx, func(ctx context.Context) error { + return db.QueryRow( + ctx, + s.sql.swap, + value.Nanos(), + value.Logical(), + key).Scan(&nanos, &logical) + }) + // No rows means that we haven't seen this key before. + if errors.Is(err, pgx.ErrNoRows) { + return hlc.Zero(), nil + } + return hlc.New(nanos, logical), errors.Wrap(err, s.sql.swap) +} diff --git a/internal/backend/timestamp/store_test.go b/internal/backend/timestamp/store_test.go new file mode 100644 index 00000000..192a272f --- /dev/null +++ b/internal/backend/timestamp/store_test.go @@ -0,0 +1,52 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package timestamp + +import ( + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/stretchr/testify/assert" +) + +func TestSwap(t *testing.T) { + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + a.NotEmpty(dbInfo.Version()) + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + s, err := New(ctx, dbInfo.Pool(), DefaultTable) + if !a.NoError(err) { + return + } + + const count = 10 + prev := hlc.Zero() + for i := 0; i <= count; i++ { + next := hlc.New(int64(1000*i), i) + found, err := s.Swap(ctx, dbInfo.Pool(), targetDB.Raw(), next) + if !a.NoError(err) { + return + } + a.Equal(prev, found) + prev = next + } + + a.Equal(int64(1000*count), prev.Nanos()) + a.Equal(count, prev.Logical()) +} diff --git a/internal/frontend/cdc/handler.go b/internal/frontend/cdc/handler.go new file mode 100644 index 00000000..0a020e5a --- /dev/null +++ b/internal/frontend/cdc/handler.go @@ -0,0 +1,188 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package cdc contains a http.Handler which can receive +// webhook events from a CockroachDB CDC changefeed. +package cdc + +import ( + "bufio" + "context" + "io" + "log" + "net/http" + "time" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/batches" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/cockroachdb/cdc-sink/internal/util/retry" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" +) + +// This file contains code repackaged from main.go + +// Handler is an http.Handler for processing webhook requests +// from a CockroachDB changefeed. +type Handler struct { + Appliers sinktypes.Appliers // Update tables within TargetDb. + Immediate bool // If true, apply mutations immediately. + Pool *pgxpool.Pool // Access to the target cluster. + Stores sinktypes.MutationStores // Record incoming json blobs. + Swapper sinktypes.TimeSwapper // Tracks named timestamps. + Watchers sinktypes.Watchers // Schema data. +} + +func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + ctx, cancel := context.WithTimeout(r.Context(), 30*time.Second) + defer cancel() + + sendErr := func(err error) { + if err == nil { + http.Error(w, "OK", http.StatusOK) + return + } + http.Error(w, err.Error(), http.StatusBadRequest) + log.Printf("ERROR %s:\n%v", r.RequestURI, err) + } + + // Is it an ndjson url? + if ndjson, err := parseNdjsonURL(r.RequestURI); err == nil { + sendErr(h.ndjson(ctx, ndjson, r.Body)) + } else if resolved, err := parseResolvedURL(r.RequestURI); err == nil { + sendErr(h.resolved(ctx, resolved)) + } else { + http.NotFound(w, r) + } +} + +// ndjson parses an incoming block of ndjson files and stores the +// associated Mutations. This assumes that the underlying +// MutationStore will store duplicate values in an idempotent manner, +// should the request fail partway through. +func (h *Handler) ndjson(ctx context.Context, u ndjsonURL, r io.Reader) error { + muts, release := batches.Mutation() + defer release() + + target, err := ident.Relative(ident.New(u.targetDB), u.targetTable) + if err != nil { + return err + } + + // In immediate mode, we want to apply the mutations immediately. + // The CDC feed guarantees in-order delivery for individual rows. + var flush func() error + if h.Immediate { + applier, err := h.Appliers.Get(ctx, target) + if err != nil { + return err + } + flush = func() error { + err := applier.Apply(ctx, h.Pool, muts) + muts = muts[:0] + return err + } + } else { + store, err := h.Stores.Get(ctx, target) + if err != nil { + return err + } + flush = func() error { + err := store.Store(ctx, h.Pool, muts) + muts = muts[:0] + return err + } + } + + scanner := bufio.NewScanner(r) + for scanner.Scan() { + buf := scanner.Bytes() + if len(buf) == 0 { + continue + } + mut, err := parseMutation(buf) + if err != nil { + return err + } + muts = append(muts, mut) + if len(muts) == cap(muts) { + flush() + } + } + if err := scanner.Err(); err != nil { + return err + } + return flush() +} + +// resolved acts upon a resolved timestamp message. +func (h *Handler) resolved(ctx context.Context, r resolvedURL) error { + if h.Immediate { + return nil + } + targetDB := ident.New(r.targetDB) + + return retry.Retry(ctx, func(ctx context.Context) error { + tx, err := h.Pool.Begin(ctx) + if err != nil { + return err + } + defer tx.Rollback(ctx) + + watcher, err := h.Watchers.Get(ctx, targetDB) + if err != nil { + return err + } + schema := watcher.Snapshot() + + // Prepare to merge data. + stores := make([]sinktypes.MutationStore, 0, len(schema)) + appliers := make([]sinktypes.Applier, 0, len(schema)) + for table := range schema { + store, err := h.Stores.Get(ctx, table) + if err != nil { + return err + } + stores = append(stores, store) + + applier, err := h.Appliers.Get(ctx, table) + if err != nil { + return err + } + appliers = append(appliers, applier) + } + + prev, err := h.Swapper.Swap(ctx, tx, "_resolved_"+targetDB.Raw(), r.timestamp) + if err != nil { + return err + } + + if hlc.Compare(r.timestamp, prev) < 0 { + return errors.Errorf( + "resolved timestamp went backwards: received %s had %s", + r.timestamp, prev) + } + + for i := range stores { + muts, err := stores[i].Drain(ctx, tx, prev, r.timestamp) + if err != nil { + return err + } + + if err := appliers[i].Apply(ctx, tx, muts); err != nil { + return err + } + } + + return tx.Commit(ctx) + }) +} diff --git a/internal/frontend/cdc/handler_test.go b/internal/frontend/cdc/handler_test.go new file mode 100644 index 00000000..38112434 --- /dev/null +++ b/internal/frontend/cdc/handler_test.go @@ -0,0 +1,142 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +import ( + "strings" + "testing" + + "github.com/cockroachdb/cdc-sink/internal/backend/apply" + "github.com/cockroachdb/cdc-sink/internal/backend/mutation" + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/backend/timestamp" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +func TestHandler(t *testing.T) { + t.Run("deferred", func(t *testing.T) { testHandler(t, false) }) + t.Run("immediate", func(t *testing.T) { testHandler(t, true) }) +} + +func testHandler(t *testing.T, immediate bool) { + t.Helper() + a := assert.New(t) + + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + dbName, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + tableInfo, err := sinktest.CreateTable(ctx, dbName, + `CREATE TABLE %s (pk INT PRIMARY KEY, v INT NOT NULL)`) + if !a.NoError(err) { + return + } + + swapper, err := timestamp.New(ctx, dbInfo.Pool(), ident.Resolved) + if !a.NoError(err) { + return + } + + watchers, cancel := schemawatch.NewWatchers(dbInfo.Pool()) + defer cancel() + + appliers, cancel := apply.New(watchers) + defer cancel() + + h := &Handler{ + Appliers: appliers, + Immediate: immediate, + Pool: dbInfo.Pool(), + Stores: mutation.New(dbInfo.Pool(), ident.StagingDB), + Swapper: swapper, + Watchers: watchers, + } + + t.Run("smoke", func(t *testing.T) { + a := assert.New(t) + + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(` +{ "after" : { "pk" : 42, "v" : 99 }, "key" : [ 42 ], "updated" : "1.0" } +{ "after" : { "pk" : 99, "v" : 42 }, "key" : [ 99 ], "updated" : "1.0" } +`))) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(2, 0), + })) + + ct, err := tableInfo.RowCount(ctx) + a.NoError(err) + a.Equal(2, ct) + + // Now, delete the data. + + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(` +{ "after" : null, "key" : [ 42 ], "updated" : "3.0" } +{ "key" : [ 99 ], "updated" : "3.0" } +`))) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(5, 0), + })) + + ct, err = tableInfo.RowCount(ctx) + a.NoError(err) + a.Equal(0, ct) + }) + + t.Run("empty-ndjson", func(t *testing.T) { + a := assert.New(t) + a.NoError(h.ndjson(ctx, + ndjsonURL{ + targetDB: dbName.Raw(), + targetTable: tableInfo.Name().Table().Raw(), + }, + strings.NewReader(""))) + }) + + t.Run("resolved-goes-backwards", func(t *testing.T) { + a := assert.New(t) + + a.NoError(h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(50, 0), + })) + err := h.resolved(ctx, resolvedURL{ + targetDB: dbName.Raw(), + timestamp: hlc.New(25, 0), + }) + if immediate { + a.NoError(err) + } else if a.Error(err) { + a.Contains(err.Error(), "backwards") + } + }) +} diff --git a/internal/frontend/cdc/ndjson_url.go b/internal/frontend/cdc/ndjson_url.go new file mode 100644 index 00000000..49af84ea --- /dev/null +++ b/internal/frontend/cdc/ndjson_url.go @@ -0,0 +1,84 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +import ( + "bytes" + "encoding/json" + "fmt" + "regexp" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/pkg/errors" +) + +// See https://www.cockroachlabs.com/docs/stable/create-changefeed.html#general-file-format +// Example: /target//2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1.ndjson +// Format is: /[endpoint]/[date]/[timestamp]-[uniquer]-[topic]-[schema-id] +var ( + ndjsonRegex = regexp.MustCompile(`/(?P[^/]*)/(?P\d{4}-\d{2}-\d{2})/(?P.+)-(?P[^-]+)-(?P[^-]+).ndjson$`) + ndjsonEndpointIdx = ndjsonRegex.SubexpIndex("target") + ndjsonTopicIdx = ndjsonRegex.SubexpIndex("topic") +) + +// ndjsonURL contains all the parsed info from an ndjson url. +type ndjsonURL struct { + targetDB string + targetTable string +} + +func parseNdjsonURL(url string) (ndjsonURL, error) { + match := ndjsonRegex.FindStringSubmatch(url) + if match == nil { + return ndjsonURL{}, fmt.Errorf("can't parse url %s", url) + } + + return ndjsonURL{ + targetDB: match[ndjsonEndpointIdx], + targetTable: match[ndjsonTopicIdx], + }, nil +} + +// parseMutation takes a single line from an ndjson and extracts enough +// information to be able to persist it to the staging table. +func parseMutation(rawBytes []byte) (sinktypes.Mutation, error) { + var payload struct { + After json.RawMessage `json:"after"` + Key json.RawMessage `json:"key"` + Updated string `json:"updated"` + } + + // Large numbers are not turned into strings, so the UseNumber option for + // the decoder is required. + dec := json.NewDecoder(bytes.NewReader(rawBytes)) + dec.UseNumber() + if err := dec.Decode(&payload); err != nil { + return sinktypes.Mutation{}, err + } + + if payload.Updated == "" { + return sinktypes.Mutation{}, + errors.New("CREATE CHANGEFEED must specify the 'WITH updated' option") + } + + // Parse the timestamp into nanos and logical. + ts, err := hlc.Parse(payload.Updated) + if err != nil { + return sinktypes.Mutation{}, err + } + + return sinktypes.Mutation{ + Time: ts, + Data: payload.After, + Key: payload.Key, + }, nil +} diff --git a/internal/frontend/cdc/resolved_url.go b/internal/frontend/cdc/resolved_url.go new file mode 100644 index 00000000..860296bf --- /dev/null +++ b/internal/frontend/cdc/resolved_url.go @@ -0,0 +1,94 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +// This file contains code repackaged from url.go. + +import ( + "fmt" + "regexp" + "strconv" + "time" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/pkg/errors" +) + +// Example: /test.sql/2020-04-04/202004042351304139680000000000000.RESOLVED +// Format is: /[target]/[date]/[timestamp].RESOLVED +var ( + resolvedRegex = regexp.MustCompile( + `^/(?P.*)/(?P\d{4}-\d{2}-\d{2})/(?P\d{33}).RESOLVED$`) + resolvedTargetIdx = resolvedRegex.SubexpIndex("target") + resolvedTimestampIdx = resolvedRegex.SubexpIndex("timestamp") +) + +// resolvedURL contains all the parsed info from an ndjson url. +type resolvedURL struct { + targetDB string + timestamp hlc.Time +} + +func parseResolvedURL(url string) (resolvedURL, error) { + match := resolvedRegex.FindStringSubmatch(url) + if len(match) != resolvedRegex.NumSubexp()+1 { + return resolvedURL{}, fmt.Errorf("can't parse url %s", url) + } + + resolved := resolvedURL{ + targetDB: match[resolvedTargetIdx], + } + + tsText := match[resolvedTimestampIdx] + if len(tsText) != 33 { + return resolvedURL{}, errors.Errorf( + "expected timestamp to be 33 characters long, got %d: %s", + len(tsText), tsText, + ) + } + var err error + resolved.timestamp, err = parseResolvedTimestamp(tsText[:23], tsText[23:]) + return resolved, err +} + +// This is the timestamp format: YYYYMMDDHHMMSSNNNNNNNNNLLLLLLLLLL +// Formatting const stolen from https://github.com/cockroachdb/cockroach/blob/master/pkg/ccl/changefeedccl/sink_cloudstorage.go#L48 +const timestampDateTimeFormat = "20060102150405" + +func parseResolvedTimestamp(timestamp string, logical string) (hlc.Time, error) { + if len(timestamp) != 23 { + return hlc.Time{}, fmt.Errorf("can't parse timestamp %s", timestamp) + } + if len(logical) != 10 { + return hlc.Time{}, fmt.Errorf("can't parse logical timestamp %s", logical) + } + + // Parse the date and time. + timestampParsed, err := time.Parse(timestampDateTimeFormat, timestamp[0:14]) + if err != nil { + return hlc.Time{}, err + } + + // Parse out the nanos + nanos, err := time.ParseDuration(timestamp[14:23] + "ns") + if err != nil { + return hlc.Time{}, err + } + timestampParsed.Add(nanos) + + // Parse out the logical timestamp + logicalParsed, err := strconv.Atoi(logical) + if err != nil { + return hlc.Time{}, err + } + + return hlc.New(timestampParsed.UnixNano(), logicalParsed), nil +} diff --git a/internal/frontend/cdc/url_test.go b/internal/frontend/cdc/url_test.go new file mode 100644 index 00000000..1c00adff --- /dev/null +++ b/internal/frontend/cdc/url_test.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package cdc + +// This file contains code repackaged from url_test.go. + +import ( + "testing" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/stretchr/testify/assert" +) + +func TestNdjsonURL(t *testing.T) { + a := assert.New(t) + const u = "/target/2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1f.ndjson" + + p, err := parseNdjsonURL(u) + if a.NoError(err) { + a.Equal("target", p.targetDB) + a.Equal("test_table", p.targetTable) + } +} + +func TestResolvedURL(t *testing.T) { + a := assert.New(t) + const u = "/target/2020-04-04/202004042351304139680000000000456.RESOLVED" + + r, err := parseResolvedURL(u) + if a.NoError(err) { + a.Equal("target", r.targetDB) + a.Equal(hlc.New(1586044290000000000, 456), r.timestamp) + } +} diff --git a/internal/frontend/server/integration_test.go b/internal/frontend/server/integration_test.go new file mode 100644 index 00000000..c619b360 --- /dev/null +++ b/internal/frontend/server/integration_test.go @@ -0,0 +1,107 @@ +package server + +import ( + "net/url" + "strings" + "testing" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/sinktest" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/stretchr/testify/assert" +) + +func TestIntegration(t *testing.T) { + if testing.Short() { + t.Skip("short tests requested") + } + + a := assert.New(t) + ctx, dbInfo, cancel := sinktest.Context() + defer cancel() + + sourceDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + targetDB, cancel, err := sinktest.CreateDB(ctx) + if !a.NoError(err) { + return + } + defer cancel() + + srv, err := newServer(ctx, "127.0.0.1:0", dbInfo.Pool().Config().ConnString(), false) + if !a.NoError(err) { + return + } + // Run the server loop in the background. + go srv.serve() + + // Set up source and target tables. + source, err := sinktest.CreateTable(ctx, sourceDB, "CREATE TABLE %s (pk INT PRIMARY KEY, val STRING)") + if !a.NoError(err) { + return + } + + target := sinktest.NewTableInfo(dbInfo, ident.NewTable(targetDB, ident.Public, source.Name().Table())) + if !a.NoError(target.Exec(ctx, "CREATE TABLE %s (pk INT PRIMARY KEY, val STRING)")) { + return + } + + // Add base data to the source table. + a.NoError(source.Exec(ctx, "INSERT INTO %s (pk, val) VALUES (1, 'one')")) + ct, err := source.RowCount(ctx) + a.NoError(err) + a.Equal(1, ct) + + // Set up the changefeed. + feedURL := url.URL{ + Scheme: "http", + Host: srv.listener.Addr().String(), + Path: target.Name().Database().Raw(), + } + if strings.Contains(dbInfo.Version(), "v20.2.") || strings.Contains(dbInfo.Version(), "v21.1.") { + feedURL.Scheme = "experimental-http" + } + + if !a.NoError(source.Exec(ctx, + "CREATE CHANGEFEED FOR TABLE %s "+ + "INTO '"+feedURL.String()+"' "+ + "WITH updated,resolved='1s'")) { + return + } + + // Wait for the backfilled value. + for { + ct, err := target.RowCount(ctx) + if !a.NoError(err) { + return + } + if ct >= 1 { + break + } + t.Log("waiting for backfill") + time.Sleep(time.Second) + } + + // Insert an additional value + a.NoError(source.Exec(ctx, "INSERT INTO %s (pk, val) VALUES (2, 'two')")) + ct, err = source.RowCount(ctx) + a.NoError(err) + a.Equal(2, ct) + + // Wait for the streamed value. + for { + ct, err := target.RowCount(ctx) + if !a.NoError(err) { + return + } + if ct >= 2 { + break + } + t.Log("waiting for stream") + time.Sleep(time.Second) + } +} diff --git a/internal/frontend/server/server.go b/internal/frontend/server/server.go new file mode 100644 index 00000000..70994d0f --- /dev/null +++ b/internal/frontend/server/server.go @@ -0,0 +1,153 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package server contains a generic HTTP server that installs +// the CDC listener. +package server + +// This file contains code repackaged from main.go + +import ( + "context" + "flag" + "log" + "net" + "net/http" + "time" + + "github.com/cockroachdb/cdc-sink/internal/backend/apply" + "github.com/cockroachdb/cdc-sink/internal/backend/mutation" + "github.com/cockroachdb/cdc-sink/internal/backend/schemawatch" + "github.com/cockroachdb/cdc-sink/internal/backend/timestamp" + "github.com/cockroachdb/cdc-sink/internal/frontend/cdc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgx/v4" + "github.com/jackc/pgx/v4/pgxpool" + "github.com/pkg/errors" + "golang.org/x/net/http2" + "golang.org/x/net/http2/h2c" +) + +// Various flags. +var ( + BindAddr = flag.String( + "bindAddr", ":26258", "the network address to bind to") + + ConnectionString = flag.String( + "conn", + "postgresql://root@localhost:26257/defaultdb?sslmode=disable", + "cockroach connection string", + ) + + IgnoreResolved = flag.Bool("ignoreResolved", false, + "write data to the target databae immediately, without "+ + "waiting for resolved timestamps") +) + +// Main is the entry point to the server. +func Main(ctx context.Context) error { + if !flag.Parsed() { + flag.Parse() + } + s, err := newServer(ctx, *BindAddr, *ConnectionString, *IgnoreResolved) + if err != nil { + return err + } + return s.serve() +} + +type server struct { + listener net.Listener + srv *http.Server +} + +// newServer performs all of the setup work that's likely to fail before +// actually serving network requests. +func newServer(ctx context.Context, + bindAddr, connectionString string, + ignoreResolved bool, +) (*server, error) { + cfg, err := pgxpool.ParseConfig(connectionString) + if err != nil { + return nil, errors.Wrapf(err, "could not parse %q", connectionString) + } + // Identify traffic. + cfg.AfterConnect = func(ctx context.Context, conn *pgx.Conn) error { + _, err := conn.Exec(ctx, "SET application_name=$1", "cdc-sink") + return err + } + // Ensure connection diversity through long-lived loadbalancers. + cfg.MaxConnLifetime = 10 * time.Minute + // Keep one spare connection. + cfg.MinConns = 1 + pool, err := pgxpool.ConnectConfig(ctx, cfg) + if err != nil { + return nil, errors.Wrap(err, "could not connect to CockroachDB") + } + + swapper, err := timestamp.New(ctx, pool, ident.Resolved) + if err != nil { + return nil, err + } + + watchers, cancelWatchers := schemawatch.NewWatchers(pool) + appliers, cancelAppliers := apply.New(watchers) + + mux := &http.ServeMux{} + mux.HandleFunc("/_/healthz", func(w http.ResponseWriter, r *http.Request) { + if err := pool.Ping(r.Context()); err != nil { + log.Printf("health check failed: %v", err) + http.Error(w, "health check failed", http.StatusInternalServerError) + return + } + http.Error(w, "OK", http.StatusOK) + }) + mux.Handle("/", &cdc.Handler{ + Appliers: appliers, + Immediate: ignoreResolved, + Pool: pool, + Stores: mutation.New(pool, ident.StagingDB), + Swapper: swapper, + Watchers: watchers, + }) + + l, err := net.Listen("tcp", bindAddr) + if err != nil { + return nil, errors.Wrapf(err, "could not bind to %q", bindAddr) + } + + log.Printf("listening on %s", l.Addr()) + srv := &http.Server{ + Handler: h2c.NewHandler(logWrapper(mux), &http2.Server{}), + } + go func() { + <-ctx.Done() + log.Println("server shutting down") + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + if err := srv.Shutdown(ctx); err != nil { + log.Printf("error during server shutdown: %v", err) + } + l.Close() + cancelAppliers() + cancelWatchers() + pool.Close() + }() + + return &server{l, srv}, nil +} + +func (s *server) serve() error { + err := s.srv.Serve(s.listener) + if errors.Is(err, http.ErrServerClosed) { + return nil + } + return err +} diff --git a/internal/frontend/server/wrapper.go b/internal/frontend/server/wrapper.go new file mode 100644 index 00000000..256a2083 --- /dev/null +++ b/internal/frontend/server/wrapper.go @@ -0,0 +1,41 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package server + +import ( + "log" + "net/http" + "time" +) + +type responseSpy struct { + http.ResponseWriter + statusCode int +} + +func (s *responseSpy) WriteHeader(statusCode int) { + s.statusCode = statusCode + s.ResponseWriter.WriteHeader(statusCode) +} + +func logWrapper(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + spy := &responseSpy{w, 0} + start := time.Now() + h.ServeHTTP(spy, r) + elapsed := time.Since(start) + log.Printf("http status %d %s: %s in %s", + spy.statusCode, + http.StatusText(spy.statusCode), + r.URL.Path, + elapsed) + }) +} diff --git a/internal/sinktypes/sinktypes.go b/internal/sinktypes/sinktypes.go new file mode 100644 index 00000000..50855ad5 --- /dev/null +++ b/internal/sinktypes/sinktypes.go @@ -0,0 +1,114 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package sinktypes contains data types and interfaces that define the +// major functional blocks of code within cdc-sink. The goal of placing +// the types into this package is to make it easy to compose +// functionality as the cdc-sink project evolves. +package sinktypes + +import ( + "bytes" + "context" + "encoding/json" + + "github.com/cockroachdb/cdc-sink/internal/util/hlc" + "github.com/cockroachdb/cdc-sink/internal/util/ident" + "github.com/jackc/pgtype/pgxtype" + "github.com/jackc/pgx/v4" +) + +// An Applier accepts some number of Mutations and applies them to +// a target table. +type Applier interface { + Apply(context.Context, Batcher, []Mutation) error +} + +// Appliers is a factory for Applier instances. +type Appliers interface { + Get(ctx context.Context, target ident.Table) (Applier, error) +} + +// A Batcher allows for a batch of statements to be executed in a single +// round-trip to the database. This is implemented by several pgx types, +// such as pgxpool.Pool and pgx.Tx. +type Batcher interface { + pgxtype.Querier + SendBatch(ctx context.Context, batch *pgx.Batch) pgx.BatchResults +} + +// A Mutation describes a row to upsert into the target database. That +// is, it is a collection of column values to apply to a row in some +// table. +type Mutation struct { + Data json.RawMessage // An encoded JSON object: { "key" : "hello" } + Key json.RawMessage // An encoded JSON array: [ "hello" ] + Time hlc.Time // The effective time of the mutation +} + +var nullBytes = []byte("null") + +// Delete returns true if the Mutation represents a deletion. +func (m Mutation) Delete() bool { + return len(m.Data) == 0 || bytes.Equal(m.Data, nullBytes) +} + +// MutationStore describes a service which can durably persist some +// number of Mutations. +type MutationStore interface { + // Drain will delete queued mutations. It is not idempotent. + Drain(ctx context.Context, tx pgxtype.Querier, prev, next hlc.Time) ([]Mutation, error) + + // Store implementations should be idempotent. + Store(ctx context.Context, db Batcher, muts []Mutation) error +} + +// MutationStores is a factory for MutationStore instances. +type MutationStores interface { + Get(ctx context.Context, target ident.Table) (MutationStore, error) +} + +// A TimeSwapper maintains a durable map of string keys to timestamps. +type TimeSwapper interface { + // Swap stores a new timestamp for the given key, returning the + // previous value. If no previous value was present, hlc.Zero() will + // be returned. + Swap(context.Context, pgxtype.Querier, string, hlc.Time) (hlc.Time, error) +} + +// ColData hold SQL column metadata. +type ColData struct { + Ignored bool + Name ident.Ident + Primary bool + Type string +} + +// Watcher allows table metadata to be observed. +// +// The methods in this type return column data such that primary key +// columns are returned first, in their declaration order, followed +// by all other non-pk columns. +type Watcher interface { + // Refresh will force the Watcher to immediately query the database + // for updated schema information. This is intended for testing and + // does not need to be called in the general case. + Refresh(context.Context, pgxtype.Querier) error + // Snapshot returns the latest known schema for all tables. + Snapshot() map[ident.Table][]ColData + // Watch returns a channel that emits updated column data for + // the given table. The channel will be closed if there + Watch(table ident.Table) (_ <-chan []ColData, cancel func(), _ error) +} + +// Watchers is a factory for Watcher instances. +type Watchers interface { + Get(ctx context.Context, db ident.Ident) (Watcher, error) +} diff --git a/internal/util/batches/batches.go b/internal/util/batches/batches.go new file mode 100644 index 00000000..eafd6001 --- /dev/null +++ b/internal/util/batches/batches.go @@ -0,0 +1,104 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package batches contains support code for working with and testing +// batches of data. +package batches + +import ( + "flag" + "sync" + + "github.com/cockroachdb/cdc-sink/internal/sinktypes" +) + +const defaultSize = 1000 + +var batchSize = flag.Int("batchSize", defaultSize, "default size for batched operations") + +// Batch is a helper to perform some operation over a large number +// of values in a batch-oriented fashion. The indexes provided to +// the callback function are a half-open range [begin , end). +func Batch(count int, fn func(begin, end int) error) error { + consume := Size() + idx := 0 + for { + if consume > count { + consume = count + } + if err := fn(idx, idx+consume); err != nil { + return err + } + if consume == count { + return nil + } + idx += consume + count -= consume + } +} + +// Size returns the default size for batch operations. Testing code +// should generally use a multiple of this value to ensure that +// batching has been correctly implemented. +func Size() int { + x := batchSize + if x == nil { + return defaultSize + } + return *x +} + +// The Release function must be called to return the underlying array +// back to the pool. +type Release func() + +var intPool = &sync.Pool{New: func() interface{} { + x := make([]int, 0, Size()) + return &x +}} + +// Int returns a slice of Size() capacity. +func Int() ([]int, Release) { + ret := intPool.Get().(*[]int) + return *ret, func() { intPool.Put(ret) } +} + +var int64Pool = &sync.Pool{New: func() interface{} { + x := make([]int64, 0, Size()) + return &x +}} + +// Int64 returns a slice of Size() capacity. +func Int64() ([]int64, Release) { + ret := int64Pool.Get().(*[]int64) + return *ret, func() { int64Pool.Put(ret) } +} + +var mutationPool = &sync.Pool{New: func() interface{} { + x := make([]sinktypes.Mutation, 0, Size()) + return &x +}} + +// Mutation returns a slice of Size() capacity. +func Mutation() ([]sinktypes.Mutation, Release) { + ret := mutationPool.Get().(*[]sinktypes.Mutation) + return *ret, func() { mutationPool.Put(ret) } +} + +var stringPool = &sync.Pool{New: func() interface{} { + x := make([]string, 0, Size()) + return &x +}} + +// String returns a slice of Size() capacity. +func String() ([]string, Release) { + ret := stringPool.Get().(*[]string) + return *ret, func() { stringPool.Put(ret) } +} diff --git a/internal/util/hlc/hlc.go b/internal/util/hlc/hlc.go new file mode 100644 index 00000000..07fb65f9 --- /dev/null +++ b/internal/util/hlc/hlc.go @@ -0,0 +1,87 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package hlc contains a trivial representation of CockroachDB's hybrid +// logical clock timestamp. +package hlc + +// The code in this file is reworked from sink_table.go. + +import ( + "fmt" + "strconv" + "strings" + "time" + + "github.com/pkg/errors" +) + +// Time is a representation of the hybrid logical clock timestamp used +// by CockroachDB. This is an immutable value type, suitable for use as +// a map key. +type Time struct { + nanos int64 + logical int +} + +// Compare two timestamps. +func Compare(a, b Time) int { + if c := a.nanos - b.nanos; c != 0 { + return int(c) + } + return a.logical - b.logical +} + +// From constructs an HLC time from a wall time. +func From(t time.Time) Time { + return Time{t.UnixNano(), 0} +} + +// New constructs a new Time with wall and logical parts. +func New(nanos int64, logical int) Time { + return Time{nanos, logical} +} + +// Parse splits a timestmap of the format NNNN.LLL into an int64 +// for the nanos and an int for the logical component. +func Parse(timestamp string) (Time, error) { + splits := strings.Split(timestamp, ".") + if len(splits) != 2 { + return Time{}, errors.Errorf("can't parse timestamp %s", timestamp) + } + nanos, err := strconv.ParseInt(splits[0], 0, 0) + if err != nil { + return Time{}, err + } + if nanos <= 0 { + return Time{}, errors.Errorf("nanos must be greater than 0: %d", nanos) + } + logical, err := strconv.Atoi(splits[1]) + if len(splits[1]) != 10 && logical != 0 { + return Time{}, errors.Errorf("logical part %q must be 10 digits or zero-valued", splits[1]) + } + return Time{nanos, logical}, err +} + +// Zero returns a zero-valued Time. +func Zero() Time { + return Time{} +} + +// Logical returns the logical counter. +func (t Time) Logical() int { return t.logical } + +// Nanos returns the nanosecond wall time. +func (t Time) Nanos() int64 { return t.nanos } + +// String returns the Time as a +func (t Time) String() string { + return fmt.Sprintf("%d.%010d", t.nanos, t.logical) +} diff --git a/internal/util/hlc/hlc_test.go b/internal/util/hlc/hlc_test.go new file mode 100644 index 00000000..469ddc66 --- /dev/null +++ b/internal/util/hlc/hlc_test.go @@ -0,0 +1,66 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package hlc + +import ( + "fmt" + "math" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCompare(t *testing.T) { + a := assert.New(t) + + a.True(Compare(Time{1, 1}, Time{1, 1}) == 0) + + a.True(Compare(Time{2, 1}, Time{1, 1}) > 0) + a.True(Compare(Time{1, 1}, Time{2, 1}) < 0) + + a.True(Compare(Time{1, 2}, Time{1, 1}) > 0) + a.True(Compare(Time{1, 1}, Time{1, 2}) < 0) +} + +func TestParse(t *testing.T) { + // Implementation copied from sink_table_test.go + + tests := []struct { + testcase string + expectedPass bool + expectedNanos int64 + expectedLogical int + }{ + {"", false, 0, 0}, + {".", false, 0, 0}, + {"1233", false, 0, 0}, + {".1233", false, 0, 0}, + {"123.123", false, 123, 123}, + {"0.0", false, 0, 0}, + {"1586019746136571000.0000000000", true, 1586019746136571000, 0}, + {"1586019746136571000.0000000001", true, 1586019746136571000, 1}, + {"9223372036854775807.2147483647", true, math.MaxInt64, math.MaxInt32}, + } + + for i, test := range tests { + t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { + a := assert.New(t) + actual, actualErr := Parse(test.testcase) + if test.expectedPass && a.NoError(actualErr) { + a.Equal(test.expectedNanos, actual.Nanos(), "nanos") + a.Equal(test.expectedLogical, actual.Logical(), "logical") + a.Equal(test.testcase, actual.String()) + } else if !test.expectedPass { + a.Error(actualErr) + } + }) + } +} diff --git a/internal/util/ident/ident.go b/internal/util/ident/ident.go new file mode 100644 index 00000000..c8361639 --- /dev/null +++ b/internal/util/ident/ident.go @@ -0,0 +1,110 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +// Package ident contains types for safely representing SQL identifiers. +package ident + +import ( + "fmt" + "strings" + + "github.com/pkg/errors" +) + +// Well-known identifiers. +var ( + StagingDB = New("_cdc_sink") // "_cdc_sink" + Public = New("public") // "public" + Resolved = NewTable(StagingDB, Public, New("resolved")) +) + +// An Ident is a quoted SQL identifier, generally a table, column, or +// database. This type is an immutable value type, suitable for use as a +// map key. +type Ident struct { + q string +} + +// New returns a quoted SQL identifier. This method will panic if an +// empty string is passed. +func New(raw string) Ident { + return Ident{`"` + strings.ReplaceAll(raw, `"`, `""`) + `"`} +} + +// Newf returns a quoted SQL identifier. +func Newf(format string, args ...interface{}) Ident { + return New(fmt.Sprintf(format, args...)) +} + +// Relative parses a table name and returns a fully-qualified Table +// name whose database value is always db. +// +// If the input table name is a simple string or has exactly two parts, +// the resulting Table will have the form "db.public.table". +// +// If the input table has three parts, it will be interpreted as a +// fully-qualified +func Relative(db Ident, table string) (Table, error) { + if table == "" { + return Table{}, errors.New("empty table") + } + + parts := strings.Split(table, ".") + switch len(parts) { + case 1: + return Table{db, Public, New(parts[0])}, nil + case 2: + return Table{db, Public, New(parts[1])}, nil + case 3: + return Table{db, New(parts[1]), New(parts[2])}, nil + default: + return Table{}, errors.Errorf("too many parts in %q", table) + } +} + +// Empty returns true if the identifier is empty. +func (n Ident) Empty() bool { + return n.q == `""` +} + +// Raw returns the original, raw value. +func (n Ident) Raw() string { + return strings.ReplaceAll(n.q[1:len(n.q)-1], `""`, `"`) +} + +// String returns the ident in a manner suitable for constructing a query. +func (n Ident) String() string { return n.q } + +// A Table identifier is a three-part ident, consisting of an SQL +// database, schema, and table ident. This type is an immutable value +// type, suitable for use as a map key. +type Table struct { + db, schema, table Ident +} + +// NewTable constructs a Table identifier. +func NewTable(db, schema, table Ident) Table { + return Table{db, schema, table} +} + +// Database returns the table's enclosing database. +func (t Table) Database() Ident { return t.db } + +// Schema returns the table's enclosing schema. +func (t Table) Schema() Ident { return t.schema } + +// Table returns the table's identifier. +func (t Table) Table() Ident { return t.table } + +// String returns the identifier in a manner suitable for constructing a +// query. +func (t Table) String() string { + return fmt.Sprintf("%s.%s.%s", t.Database(), t.Schema(), t.Table()) +} diff --git a/internal/util/ident/ident_test.go b/internal/util/ident/ident_test.go new file mode 100644 index 00000000..71cb6a6f --- /dev/null +++ b/internal/util/ident/ident_test.go @@ -0,0 +1,82 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package ident + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIdent(t *testing.T) { + a := assert.New(t) + + a.True(New("").Empty()) + + id := New("table") + a.Equal("table", id.Raw()) + a.Equal(`"table"`, id.String()) + a.False(id.Empty()) + + a.Equal(id, New("table")) + + a.Equal(`"foo!bar"`, New("foo!bar").String()) +} + +func TestQualified(t *testing.T) { + a := assert.New(t) + + id := NewTable(New("database"), New("schema"), New("table")) + a.Equal(`"database"."schema"."table"`, id.String()) +} + +func TestRelative(t *testing.T) { + foo := New("foo") + + tcs := []struct { + table string + expected Table + expectError bool + }{ + { + table: "", + expectError: true, + }, + { + table: "foo", + expected: NewTable(StagingDB, Public, foo), + }, + { + table: "other.foo", + expected: NewTable(StagingDB, Public, foo), + }, + { + table: "other.schema.foo", + expected: NewTable(StagingDB, New("schema"), foo), + }, + { + table: "other.wat.schema.foo", + expectError: true, + }, + } + + for _, tc := range tcs { + t.Run(tc.table, func(t *testing.T) { + a := assert.New(t) + parsed, err := Relative(StagingDB, tc.table) + if tc.expectError { + a.Error(err) + return + } + a.Equal(tc.expected, parsed) + }) + } +} diff --git a/util.go b/internal/util/retry/retry.go similarity index 54% rename from util.go rename to internal/util/retry/retry.go index b96b650d..eb1fcd68 100644 --- a/util.go +++ b/internal/util/retry/retry.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2021 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -8,7 +8,8 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package main +// Package retry contains utility code for retrying database transactions. +package retry // This code is taken from the Cacheroach project. @@ -16,6 +17,7 @@ import ( "context" "github.com/jackc/pgconn" + "github.com/jackc/pgtype/pgxtype" "github.com/pkg/errors" ) @@ -23,13 +25,18 @@ import ( type Marker bool // Mark sets the flag. -func (m *Marker) Mark() { - *m = true -} +func (m *Marker) Mark() { *m = true } // Marked returns the flag status. -func (m *Marker) Marked() bool { - return bool(*m) +func (m *Marker) Marked() bool { return bool(*m) } + +// Execute is a wrapper around Retry that can be used for sql +// queries that don't have any return values. +func Execute(ctx context.Context, db pgxtype.Querier, query string, args ...interface{}) error { + return Retry(ctx, func(ctx context.Context) error { + _, err := db.Exec(ctx, query, args...) + return err + }) } // Retry is a convenience wrapper to automatically retry idempotent @@ -37,21 +44,35 @@ func (m *Marker) Marked() bool { // failure. The provided callback must be entirely idempotent, with // no observable side-effects during its execution. func Retry(ctx context.Context, idempotent func(context.Context) error) error { - return RetryLoop(ctx, func(ctx context.Context, _ *Marker) error { + return Loop(ctx, func(ctx context.Context, _ *Marker) error { return idempotent(ctx) }) } -// RetryLoop is a convenience wrapper to automatically retry idempotent -// database operations that experience a transaction or or connection +// inLoop is a key used by Loop to detect reentrant behavior. +var inLoop struct{} + +// Loop is a convenience wrapper to automatically retry idempotent +// database operations that experience a transaction or a connection // failure. The provided callback may indicate that it has started // generating observable effects (e.g. sending result data) by calling // its second parameter to disable the retry behavior. -func RetryLoop(ctx context.Context, fn func(ctx context.Context, sideEffect *Marker) error) error { +// +// If Loop is called in a reentrant fashion, the retry behavior will be +// suppressed within an inner loop, allowing the retryable error to +// percolate into the outer loop. +func Loop( + ctx context.Context, + fn func(ctx context.Context, sideEffect *Marker) error, +) error { + top := ctx.Value(inLoop) == nil + if top { + ctx = context.WithValue(ctx, inLoop, inLoop) + } var sideEffect Marker for { err := fn(ctx, &sideEffect) - if err == nil || sideEffect.Marked() { + if err == nil || sideEffect.Marked() || !top { return err } diff --git a/main.go b/main.go index 5672d6bf..224d476e 100644 --- a/main.go +++ b/main.go @@ -1,4 +1,4 @@ -// Copyright 2020 The Cockroach Authors. +// Copyright 2021 The Cockroach Authors. // // Use of this software is governed by the Business Source License // included in the file licenses/BSL.txt. @@ -12,77 +12,16 @@ package main import ( "context" - "encoding/json" "flag" "fmt" "log" - "net" - "net/http" + "os" "os/signal" "runtime" "runtime/debug" "syscall" - "time" - "github.com/jackc/pgx/v4/pgxpool" - "golang.org/x/net/http2" - "golang.org/x/net/http2/h2c" -) - -var connectionString = flag.String( - "conn", - "postgresql://root@localhost:26257/defaultdb?sslmode=disable", - "cockroach connection string", -) -var port = flag.Int("port", 26258, "http server listening port") - -var sinkDB = flag.String("sink_db", "_CDC_SINK", "db for storing temp sink tables") -var dropDB = flag.Bool("drop", false, "Drop the sink db before starting?") -var sinkDBZone = flag.Bool( - "sink_db_zone_override", - true, - "allow sink_db zone config to be overridden with the cdc-sink default values", -) - -var configuration = flag.String( - "config", - "", - `This flag must be set. It requires a single line for each table passed in. -The format is the following: -[ - {"endpoint":"", "source_table":"", "destination_database":"", "destination_table":""}, - {"endpoint":"", "source_table":"", "destination_database":"", "destination_table":""}, -] - -Each table being updated requires a single line. Note that source database is -not required. -Each changefeed requires the same endpoint and you can have more than one table -in a single changefeed. - -Here are two examples: - -1) Single table changefeed. Source table and destination table are both called -users: - -[{endpoint:"cdc.sql", source_table:"users", destination_database:"defaultdb", destination_table:"users"}] - -The changefeed is initialized on the source database: -CREATE CHANGEFEED FOR TABLE users INTO 'experimental-[cdc-sink-url:port]/cdc.sql' WITH updated,resolved - -2) Two table changefeed. Two tables this time, users and customers: - -[ - {"endpoint":"cdc.sql", "source_table":"users", "destination_database":"defaultdb", "destination_table":"users"}, - {"endpoint":"cdc.sql", "source_table":"customers", "destination_database":"defaultdb", "destination_table":"customers"}, -] - -The changefeed is initialized on the source database: -CREATE CHANGEFEED FOR TABLE users,customers INTO 'experimental-[cdc-sink-url:port]/cdc.sql' WITH updated,resolved - -As of right now, only a single endpoint is supported. - -Don't forget to escape the json quotes: -./cdc-sink --config="[{\"endpoint\":\"test.sql\", \"source_table\":\"in_test1\", \"destination_database\":\"defaultdb\", \"destination_table\":\"out_test1\"},{\"endpoint\":\"test.sql\", \"source_table\":\"in_test2\", \"destination_database\":\"defaultdb\", \"destination_table\":\"out_test2\"}]"`, + "github.com/cockroachdb/cdc-sink/internal/frontend/server" ) var ( @@ -91,88 +30,9 @@ var ( printVersion = flag.Bool("version", false, "print version and exit") ) -func createHandler(db *pgxpool.Pool, sinks *Sinks) func(http.ResponseWriter, *http.Request) { - return func(w http.ResponseWriter, r *http.Request) { - // Is it an ndjson url? - ndjson, ndjsonErr := parseNdjsonURL(r.RequestURI) - if ndjsonErr == nil { - sink := sinks.FindSink(ndjson.endpoint, ndjson.topic) - if sink != nil { - sink.HandleRequest(db, w, r) - return - } - - // No sink found, throw an error. - http.Error( - w, - fmt.Sprintf("could not find a sync for %s", ndjson.topic), - http.StatusInternalServerError, - ) - return - } - - // Is it a resolved url? - resolved, resolvedErr := parseResolvedURL(r.RequestURI) - if resolvedErr == nil { - sinks.HandleResolvedRequest(r.Context(), db, resolved, w, r) - return - } - - // Could not recognize url. - http.Error( - w, - fmt.Sprintf("URL pattern does not match either an ndjson (%s) or a resolved (%s)", - ndjsonErr, resolvedErr, - ), - http.StatusInternalServerError, - ) - } -} - -// Config parses the passed in config. -type Config []ConfigEntry - -// ConfigEntry is a single table configuration entry in a config. -type ConfigEntry struct { - Endpoint string `json:"endpoint"` - SourceTable string `json:"source_table"` - DestinationDatabase string `json:"destination_database"` - DestinationTable string `json:"destination_table"` -} - -func parseConfig(rawConfig string) (Config, error) { - var config Config - if err := json.Unmarshal([]byte(rawConfig), &config); err != nil { - return Config{}, fmt.Errorf("could not parse config: %s", err.Error()) - } - - if len(config) == 0 { - return Config{}, fmt.Errorf("no config lines provided") - } - - for _, entry := range config { - if len(entry.Endpoint) == 0 { - return Config{}, fmt.Errorf("each config entry requires and endpoint") - } - - if len(entry.SourceTable) == 0 { - return Config{}, fmt.Errorf("each config entry requires a source_table") - } - - if len(entry.DestinationDatabase) == 0 { - return Config{}, fmt.Errorf("each config entry requires a destination_database") - } - - if len(entry.DestinationTable) == 0 { - return Config{}, fmt.Errorf("each config entry requires a destination_table") - } - } - - return config, nil -} - func main() { ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer cancel() // First, parse the config. flag.Parse() @@ -193,49 +53,9 @@ func main() { return } - config, err := parseConfig(*configuration) - if err != nil { - log.Print(*configuration) - log.Fatal(err) - } - - db, err := pgxpool.Connect(ctx, *connectionString) - if err != nil { - log.Fatalf("could not parse config string: %v", err) - } - defer db.Close() - - if *dropDB { - if err := DropSinkDB(ctx, db); err != nil { - log.Fatalf("Could not drop the sinkDB:%s - %v", *sinkDB, err) - } - } - - if err := CreateSinkDB(ctx, db); err != nil { - log.Fatalf("Could not create the sinkDB:%s - %v", *sinkDB, err) - } - - sinks, err := CreateSinks(ctx, db, config) - if err != nil { - log.Fatal(err) + if err := server.Main(ctx); err != nil { + log.Printf("server exited: %v", err) + os.Exit(1) } - - l, err := net.Listen("tcp", fmt.Sprintf(":%d", *port)) - if err != nil { - log.Fatalf("could not open listener: %v", err) - } - log.Printf("listening on %s", l.Addr()) - - handler := http.Handler(http.HandlerFunc(createHandler(db, sinks))) - handler = h2c.NewHandler(handler, &http2.Server{}) - - // TODO(bob): Consider configuring timeouts - svr := &http.Server{Handler: handler} - go svr.Serve(l) - <-ctx.Done() - log.Printf("waiting for connections to drain") - cancel() - ctx, cancel = context.WithTimeout(context.Background(), 30*time.Second) - _ = svr.Shutdown(ctx) - cancel() + os.Exit(0) } diff --git a/main_test.go b/main_test.go deleted file mode 100644 index e43c9dd5..00000000 --- a/main_test.go +++ /dev/null @@ -1,1712 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "io" - "io/ioutil" - "math/rand" - "net/http" - "net/http/httptest" - "reflect" - "strings" - "testing" - "time" - - "log" - "os" - - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" - "github.com/pkg/errors" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. -var ( - r *rand.Rand - rawDb *pgxpool.Pool - dbVersion string -) - -// TestMain will open a database connection and set the cluster license -// if the COCKROACH_DEV_LICENSE environment variable is set. -func TestMain(m *testing.M) { - r = rand.New(rand.NewSource(time.Now().UnixNano())) - - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - - var err error - rawDb, err = pgxpool.Connect(ctx, *connectionString) - if err != nil { - log.Fatalf("could not open database connection: %v", err) - } - - if lic, ok := os.LookupEnv("COCKROACH_DEV_LICENSE"); ok { - if _, err := rawDb.Exec(ctx, - "SET CLUSTER SETTING cluster.organization = $1", - "Cockroach Labs - Production Testing", - ); err != nil { - log.Fatalf("could not set cluster.organization: %v", err) - } - if _, err := rawDb.Exec(ctx, - "SET CLUSTER SETTING enterprise.license = $1", lic, - ); err != nil { - log.Fatalf("could not set enterprise.license: %v", err) - } - } - - if err := Execute(ctx, rawDb, "SET CLUSTER SETTING kv.rangefeed.enabled = true"); err != nil { - log.Fatalf("could not enable rangefeeds: %v", err) - return - } - - if err := Retry(ctx, func(ctx context.Context) error { - row := rawDb.QueryRow(ctx, "SELECT version()") - if err := row.Scan(&dbVersion); err != nil { - return err - } - - return nil - }); err != nil { - log.Fatalf("could not determine cluster version: %v", err) - } - - os.Exit(m.Run()) -} - -const endpointTest = "test.sql" - -// getDB creates a new testing DB, return the name of that db and a closer that -// will drop the table and close the db connection. -func getDB(ctx context.Context) (db *pgxpool.Pool, dbName string, closer func(), err error) { - db = rawDb - // Create the testing database - dbNum := r.Intn(10000) - dbName = fmt.Sprintf("_test_db_%d", dbNum) - - if err = Execute(ctx, db, fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", dbName)); err != nil { - return - } - - if err = Execute(ctx, db, fmt.Sprintf(sinkDBZoneConfig, dbName)); err != nil { - return - } - - closer = func() { - _ = Execute(ctx, db, fmt.Sprintf("DROP DATABASE %s CASCADE", dbName)) - } - - return -} - -func getRowCount(ctx context.Context, db *pgxpool.Pool, fullTableName string) (int, error) { - var count int - if err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", fullTableName)).Scan(&count) - }); err != nil { - return 0, err - } - return count, nil -} - -type tableInfo struct { - db *pgxpool.Pool - dbName string - name string -} - -func (ti tableInfo) String() string { - return fmt.Sprintf("%s.%s", ti.dbName, ti.name) -} - -func (ti tableInfo) getFullName() string { - return fmt.Sprintf("%s.%s", ti.dbName, ti.name) -} - -func (ti *tableInfo) deleteAll(ctx context.Context) error { - return Execute(ctx, ti.db, fmt.Sprintf("DELETE FROM %s WHERE true", ti.getFullName())) -} - -func (ti tableInfo) getTableRowCount(ctx context.Context) (int, error) { - return getRowCount(ctx, ti.db, ti.getFullName()) -} - -func (ti tableInfo) dropTable(ctx context.Context) error { - return Execute(ctx, ti.db, fmt.Sprintf("DROP TABLE IF EXISTS %s", ti.getFullName())) -} - -// This function creates a test table and returns a unique name. -// The schemaSpec parameter must have exactly two %s substitution -// parameters for the database name and table name. -func createTestTable(ctx context.Context, db *pgxpool.Pool, dbName, schemaSpec string) (tableInfo, error) { - var tableName string - -outer: - for { - // Create the testing database - tableNum := r.Intn(10000) - tableName = fmt.Sprintf("_test_table_%d", tableNum) - - // Find the DB. - var actualTableName string - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = $1", dbName), - tableName, - ).Scan(&actualTableName) - }) - switch err { - case pgx.ErrNoRows: - break outer - case nil: - continue - default: - return tableInfo{}, err - } - } - - if err := Execute(ctx, db, fmt.Sprintf(schemaSpec, dbName, tableName)); err != nil { - return tableInfo{}, err - } - - return tableInfo{ - db: db, - dbName: dbName, - name: tableName, - }, nil -} - -type tableInfoSimple struct { - tableInfo - rowCount int -} - -const tableSimpleSchema = ` -CREATE TABLE %s.%s ( - a INT PRIMARY KEY, - b INT -) -` - -func createTestSimpleTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoSimple, error) { - info, err := createTestTable(ctx, db, dbName, tableSimpleSchema) - return tableInfoSimple{tableInfo: info}, err -} - -func (tis *tableInfoSimple) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - tis.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1)", tis.getFullName()), - tis.rowCount+1, - ); err != nil { - return err - } - tis.rowCount++ - } - return nil -} - -func (tis *tableInfoSimple) updateNoneKeyColumns(ctx context.Context) error { - return Execute( - ctx, - tis.db, - fmt.Sprintf("UPDATE %s SET b=b*100 WHERE true", tis.getFullName()), - ) -} - -func (tis *tableInfoSimple) updateAll(ctx context.Context) error { - return Execute( - ctx, - tis.db, - fmt.Sprintf("UPDATE %s SET a=a*100000, b=b*100000 WHERE true", tis.getFullName()), - ) -} - -func (tis *tableInfoSimple) maxB(ctx context.Context) (int, error) { - var max int - err := Retry(ctx, func(ctx context.Context) error { - return tis.db.QueryRow( - ctx, - fmt.Sprintf("SELECT max(b) FROM %s", tis.getFullName()), - ).Scan(&max) - }) - return max, err -} - -// tableInfoComposite is a table with a composite primary key. -type tableInfoComposite struct { - tableInfo - rowCount int -} - -const tableCompositeSchema = ` -CREATE TABLE %s.%s ( - a INT, - b INT, - c INT, - PRIMARY KEY (a, b) -) -` - -func createTestCompositeTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoComposite, error) { - info, err := createTestTable(ctx, db, dbName, tableCompositeSchema) - return tableInfoComposite{tableInfo: info}, err -} - -func (tis *tableInfoComposite) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - tis.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1, $1)", tis.getFullName()), - tis.rowCount+1, - ); err != nil { - return err - } - tis.rowCount++ - } - return nil -} - -type tableInfoClob struct { - tableInfo - clobSize int // The number of bytes to generate per row. - rowCount int // A running total for code generation. -} - -const tableClobSchema = ` -CREATE TABLE %s.%s ( - a INT NOT NULL PRIMARY KEY, - data TEXT -) -` - -func createTestClobTable(ctx context.Context, db *pgxpool.Pool, dbName string, clobSize int) (tableInfoClob, error) { - if clobSize <= 0 { - clobSize = 8 * 1024 - } - info, err := createTestTable(ctx, db, dbName, tableClobSchema) - return tableInfoClob{tableInfo: info, clobSize: clobSize}, err -} - -func (tic *tableInfoClob) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - c := tic.rowCount + 1 - data, err := ioutil.ReadAll(clobData(tic.clobSize, c)) - if err != nil { - return err - } - if err := Execute( - ctx, - tic.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $2)", tic.getFullName()), - c, - string(data), - ); err != nil { - return err - } - tic.rowCount++ - } - return nil -} - -// tableInfoComputed is used for tables that have magic columns -type tableInfoComputed struct { - tableInfo - rowCount int -} - -const tableComputedSchema = ` -SET experimental_enable_hash_sharded_indexes = on; -CREATE TABLE %s.%s ( - a INT PRIMARY KEY, - b INT, - c INT AS (a + b) STORED, - d INT AS (a + b) VIRTUAL, - INDEX (b ASC) USING HASH WITH BUCKET_COUNT = 8 -) -` - -func createTestComputedTable(ctx context.Context, db *pgxpool.Pool, dbName string) (tableInfoComputed, error) { - info, err := createTestTable(ctx, db, dbName, tableComputedSchema) - return tableInfoComputed{tableInfo: info}, err -} - -func (ti *tableInfoComputed) populateTable(ctx context.Context, count int) error { - for i := 0; i < count; i++ { - if err := Execute( - ctx, - ti.db, - fmt.Sprintf("INSERT INTO %s VALUES ($1, $1)", ti.getFullName()), - ti.rowCount+1, - ); err != nil { - return err - } - ti.rowCount++ - } - return nil -} - -type jobInfo struct { - db *pgxpool.Pool - id int -} - -func (ji *jobInfo) cancelJob(ctx context.Context) error { - if ji.id == 0 { - return nil - } - if err := Execute(ctx, ji.db, fmt.Sprintf("CANCEL JOB %d", ji.id)); err != nil { - return err - } - ji.id = 0 - return nil -} - -func createChangeFeed( - ctx context.Context, db *pgxpool.Pool, url string, endpoint string, tis ...tableInfo, -) (jobInfo, error) { - var query strings.Builder - fmt.Fprint(&query, "CREATE CHANGEFEED FOR TABLE ") - for i := 0; i < len(tis); i++ { - if i != 0 { - fmt.Fprint(&query, ", ") - } - fmt.Fprint(&query, tis[i].getFullName()) - } - fmt.Fprintf(&query, " INTO 'experimental-%s/%s' WITH updated,resolved", url, endpoint) - var jobID int - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, query.String()).Scan(&jobID) - }) - return jobInfo{ - db: db, - id: jobID, - }, err -} - -// dropSinkDB is just a wrapper around DropSinkDB for testing. -func dropSinkDB(ctx context.Context, db *pgxpool.Pool) error { - return DropSinkDB(ctx, db) -} - -// createSinkDB will first drop then create a new sink db. -func createSinkDB(ctx context.Context, db *pgxpool.Pool) error { - if err := dropSinkDB(ctx, db); err != nil { - return err - } - return CreateSinkDB(ctx, db) -} - -// TestDB is just a quick test to create and drop a database to ensure the -// Cockroach Cluster is working correctly and we have the correct permissions. -func TestDB(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Find the DB. - var actualDBName string - if err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow( - ctx, - `SELECT database_name FROM [SHOW DATABASES] WHERE database_name = $1`, dbName, - ).Scan(&actualDBName) - }); !a.NoError(err) { - return - } - - if !a.Equal(actualDBName, dbName, "db names do not match") { - return - } - - // Create a test table and insert some rows - table, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - if !a.NoError(table.populateTable(ctx, 10)) { - return - } - count, err := table.getTableRowCount(ctx) - a.Equal(10, count, "row count") - a.NoError(err) -} - -func createConfig(source tableInfo, destination tableInfo, endpoint string) Config { - return Config{ - ConfigEntry{ - Endpoint: endpoint, - SourceTable: source.name, - DestinationDatabase: destination.dbName, - DestinationTable: destination.name, - }, - } -} - -func TestFeedInsert(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - count, err := tableFrom.getTableRowCount(ctx) - a.Equal(10, count, "rows") - if !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - // Wait for sync to occur. - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - for { - if !a.NoError(ctx.Err()) { - return - } - toCount, err := tableTo.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - fromCount, err := tableFrom.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if toCount == fromCount { - break - } - } - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - // Wait for sync to occur again. - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "sink table not empty") - a.NoError(err) -} - -func TestFeedDelete(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.deleteAll(ctx)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedDeleteCompositeKey(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestCompositeTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestCompositeTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer(http.HandlerFunc(handler)) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.deleteAll(ctx)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedUpdateNonPrimary(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count) || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.updateNoneKeyColumns(ctx)) { - return - } - - if !a.NoError(loopUntilMaxB(ctx, &tableFrom, &tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestFeedUpdatePrimary(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - if !a.NoError(tableFrom.updateAll(ctx)) { - return - } - - if !a.NoError(loopUntilMaxB(ctx, &tableFrom, &tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func TestTypes(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the sinks - sinks, err := CreateSinks(ctx, db, []ConfigEntry{}) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - testcases := []struct { - name string - columnType string - columnValue string - indexable bool - }{ - {`string_array`, `STRING[]`, `{"sky","road","car"}`, false}, - {`string_array_null`, `STRING[]`, ``, false}, - {`int_array`, `INT[]`, `{1,2,3}`, false}, - {`int_array_null`, `INT[]`, ``, false}, - {`serial_array`, `SERIAL[]`, `{148591304110702593,148591304110702594,148591304110702595}`, false}, - {`serial_array_null`, `SERIAL[]`, ``, false}, - {`bit`, `VARBIT`, `10010101`, true}, - {`bit_null`, `VARBIT`, ``, false}, - {`bool`, `BOOL`, `true`, true}, - {`bool_null`, `BOOL`, ``, false}, - {`bytes`, `BYTES`, `b'\141\061\142\062\143\063'`, true}, - {`collate`, `STRING COLLATE de`, `'a1b2c3' COLLATE de`, true}, - {`collate_null`, `STRING COLLATE de`, ``, false}, - {`date`, `DATE`, `2016-01-25`, true}, - {`date_null`, `DATE`, ``, false}, - {`decimal`, `DECIMAL`, `1.2345`, true}, - {`decimal_null`, `DECIMAL`, ``, false}, - {`float`, `FLOAT`, `1.2345`, true}, - {`float_null`, `FLOAT`, ``, false}, - // {`geography`, `GEOGRAPHY`, `0101000020E6100000000000000000F03F0000000000000040`, false}, - // {`geometry`, `GEOMETRY`, `010100000075029A081B9A5DC0F085C954C1F84040`, false}, - {`inet`, `INET`, `192.168.0.1`, true}, - {`inet_null`, `INET`, ``, false}, - {`int`, `INT`, `12345`, true}, - {`int_null`, `INT`, ``, false}, - {`interval`, `INTERVAL`, `2h30m30s`, true}, - {`interval_null`, `INTERVAL`, ``, false}, - { - `jsonb`, - `JSONB`, - ` - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ], - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ] - ] - } - ], - "map": { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ], - { - "string": "Lola", - "bool": true, - "number": 547, - "float": 123.456, - "array": [ - "lola", - true, - 547, - 123.456, - [ - "lola", - true, - 547, - 123.456 - ] - ] - } - ] - } - } - `, - false, - }, - {`jsonb_null`, `JSONB`, ``, false}, - {`serial`, `SERIAL`, `148591304110702593`, true}, - // serial cannot be null - {`string`, `STRING`, `a1b2c3`, true}, - {`string_null`, `STRING`, ``, false}, - {`string_escape`, `STRING`, `a1\b/2?c"3`, true}, - {`time`, `TIME`, `01:23:45.123456`, true}, - {`time_null`, `TIME`, ``, false}, - {`timestamp`, `TIMESTAMP`, `2016-01-25 10:10:10`, true}, - {`timestamp_null`, `TIMESTAMP`, ``, false}, - {`timestamptz`, `TIMESTAMPTZ`, `2016-01-25 10:10:10-05:00`, true}, - {`timestamptz_null`, `TIMESTAMPTZ`, ``, false}, - {`uuid`, `UUID`, `7f9c24e8-3b12-4fef-91e0-56a2d5a246ec`, true}, - {`uuid_null`, `UUID`, ``, false}, - } - - tableIndexableSchema := `CREATE TABLE %s (a %s PRIMARY KEY, b %s)` - tableNonIndexableSchema := `CREATE TABLE %s (a INT PRIMARY KEY, b %s)` - - for _, test := range testcases { - t.Run(test.name, func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) - defer cancel() - - tableIn := tableInfo{ - db: db, - dbName: dbName, - name: fmt.Sprintf("in_%s", test.name), - } - tableOut := tableInfo{ - db: db, - dbName: dbName, - name: fmt.Sprintf("out_%s", test.name), - } - - // Drop both tables if they already exist. - if !a.NoError(tableIn.dropTable(ctx)) { - return - } - if !a.NoError(tableOut.dropTable(ctx)) { - return - } - - // Create both tables. - if test.indexable { - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableIndexableSchema, tableIn.getFullName(), test.columnType, test.columnType, - ))) { - return - } - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableIndexableSchema, tableOut.getFullName(), test.columnType, test.columnType, - ))) { - return - } - } else { - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableNonIndexableSchema, tableIn.getFullName(), test.columnType, - ))) { - return - } - if !a.NoError(Execute(ctx, db, fmt.Sprintf( - tableNonIndexableSchema, tableOut.getFullName(), test.columnType, - ))) { - return - } - } - - // Defer a table drop for both tables to clean them up. - defer tableIn.dropTable(ctx) - defer tableOut.dropTable(ctx) - - // Create the sink - // There is no way to remove a sink at this time, and that should be ok - // for these tests. - if !a.NoError(sinks.AddSink(ctx, db, ConfigEntry{ - Endpoint: endpointTest, - DestinationDatabase: dbName, - DestinationTable: tableOut.name, - SourceTable: tableIn.name, - })) { - return - } - - // Create the CDC feed. - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableIn) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - // Insert a row into the in table. - if test.indexable { - if !a.NoError(Execute(ctx, db, - fmt.Sprintf("INSERT INTO %s (a,b) VALUES ($1,$2)", tableIn.getFullName()), - test.columnValue, test.columnValue, - )) { - return - } - } else { - value := interface{}(test.columnValue) - if len(test.columnValue) == 0 { - value = nil - } - if !a.NoError(Execute(ctx, db, - fmt.Sprintf("INSERT INTO %s (a, b) VALUES (1, $1)", tableIn.getFullName()), - value, - )) { - return - } - } - - // Wait until the out table has a row. - for { - count, err := tableOut.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if count > 0 { - break - } - } - - // Now fetch that rows and compare them. - var inA, inB interface{} - if !a.NoError(Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT a, b FROM %s LIMIT 1", tableIn.getFullName()), - ).Scan(&inA, &inB) - })) { - return - } - var outA, outB interface{} - if !a.NoError(Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, - fmt.Sprintf("SELECT a, b FROM %s LIMIT 1", tableOut.getFullName()), - ).Scan(&outA, &outB) - })) { - return - } - a.Equal(fmt.Sprintf("%v", inA), fmt.Sprintf("%v", outA), "A") - a.Equal(fmt.Sprintf("%v", inB), fmt.Sprintf("%v", outB), "B") - }) - } -} - -func TestConfig(t *testing.T) { - testCases := []struct { - name string - testJSON string - expectedPass bool - expectedConfig Config - }{ - { - name: "empty", - testJSON: "", - expectedPass: false, - }, - { - name: "empty2", - testJSON: "[]", - expectedPass: false, - }, - { - name: "empty3", - testJSON: "[{}]", - expectedPass: false, - }, - { - name: "missing endpoint", - testJSON: `[{"source_table":"s_tbl", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing source table", - testJSON: `[{"endpoint":"test.sql", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing destination database", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "missing destination table", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db"}]`, - expectedPass: false, - }, - { - name: "empty endpoint", - testJSON: `[{"endpoint":"", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty source table", - testJSON: `[{"endpoint":"test.sql", "source_table":"", "destination_database":"d_db", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty destination database", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"", "destination_table":"dt_tbl"}]`, - expectedPass: false, - }, - { - name: "empty destination table", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":""}]`, - expectedPass: false, - }, - { - name: "single", - testJSON: `[{"endpoint":"test.sql", "source_table":"s_tbl", "destination_database":"d_db", "destination_table":"d_tbl"}]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl", DestinationDatabase: "d_db", DestinationTable: "d_tbl"}, - }, - }, - { - name: "double", - testJSON: `[ - {"endpoint":"test.sql", "source_table":"s_tbl1", "destination_database":"d_db", "destination_table":"d_tbl1"}, - {"endpoint":"test.sql", "source_table":"s_tbl2", "destination_database":"d_db", "destination_table":"d_tbl2"} -]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl1", DestinationDatabase: "d_db", DestinationTable: "d_tbl1"}, - ConfigEntry{Endpoint: "test.sql", SourceTable: "s_tbl2", DestinationDatabase: "d_db", DestinationTable: "d_tbl2"}, - }, - }, - { - name: "triple", - testJSON: `[ - {"endpoint":"test1.sql", "source_table":"s_tbl1", "destination_database":"d_db1", "destination_table":"d_tbl1"}, - {"endpoint":"test1.sql", "source_table":"s_tbl2", "destination_database":"d_db1", "destination_table":"d_tbl2"}, - {"endpoint":"test2.sql", "source_table":"s_tbl3", "destination_database":"d_db2", "destination_table":"d_tbl3"} -]`, - expectedPass: true, - expectedConfig: Config{ - ConfigEntry{Endpoint: "test1.sql", SourceTable: "s_tbl1", DestinationDatabase: "d_db1", DestinationTable: "d_tbl1"}, - ConfigEntry{Endpoint: "test1.sql", SourceTable: "s_tbl2", DestinationDatabase: "d_db1", DestinationTable: "d_tbl2"}, - ConfigEntry{Endpoint: "test2.sql", SourceTable: "s_tbl3", DestinationDatabase: "d_db2", DestinationTable: "d_tbl3"}, - }, - }, - } - - for _, test := range testCases { - t.Run(test.name, func(t *testing.T) { - a := assert.New(t) - - actual, err := parseConfig(test.testJSON) - if test.expectedPass { - a.NoError(err) - a.True(reflect.DeepEqual(test.expectedConfig, actual)) - } else { - a.Error(err) - } - }) - } -} - -func TestMultipleFeeds(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - testcases := []struct { - feedCount int - tablesPerFeed int - populateCount int - }{ - {1, 1, 1000}, - {1, 2, 10}, - {1, 3, 10}, - {2, 1, 10}, - {2, 2, 10}, - {2, 3, 10}, - {3, 1, 10}, - {3, 2, 10}, - {3, 3, 10}, - } - - nameEndpoint := func(feedID int) string { - return fmt.Sprintf("test_%d_%s", feedID, endpointTest) - } - - for _, testcase := range testcases { - t.Run(fmt.Sprintf("Feeds_%d_Tables_%d_Size_%d", - testcase.feedCount, testcase.tablesPerFeed, testcase.populateCount, - ), func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create all the tables - var sourceTablesByFeed [][]*tableInfoSimple - var destinationTablesByFeed [][]*tableInfoSimple - for i := 0; i < testcase.feedCount; i++ { - var sourceTables []*tableInfoSimple - var destinationTables []*tableInfoSimple - for j := 0; j < testcase.tablesPerFeed; j++ { - sourceTable, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoErrorf(err, "create source i=%d, j=%d", i, j) { - return - } - sourceTables = append(sourceTables, &sourceTable) - destinationTable, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoErrorf(err, "create dest i=%d, j=%d", i, j) { - return - } - destinationTables = append(destinationTables, &destinationTable) - } - sourceTablesByFeed = append(sourceTablesByFeed, sourceTables) - destinationTablesByFeed = append(destinationTablesByFeed, destinationTables) - } - - // Populate all the source tables - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - if !a.NoError(table.populateTable(ctx, testcase.populateCount), table.name) { - return - } - } - } - - // Create the sinks - sinks, err := CreateSinks(ctx, db, []ConfigEntry{}) - if !a.NoError(err) { - return - } - - // Create all the sinks - for i := 0; i < testcase.feedCount; i++ { - for j := 0; j < testcase.tablesPerFeed; j++ { - if !a.NoErrorf(sinks.AddSink(ctx, db, ConfigEntry{ - Endpoint: nameEndpoint(i), - DestinationDatabase: destinationTablesByFeed[i][j].dbName, - DestinationTable: destinationTablesByFeed[i][j].name, - SourceTable: sourceTablesByFeed[i][j].name, - }), "AddSink i=%d j=%d", i, j) { - return - } - } - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer(http.HandlerFunc(handler)) - defer server.Close() - - // Create the changefeeds - for i := 0; i < testcase.feedCount; i++ { - var tableInfos []tableInfo - for _, table := range sourceTablesByFeed[i] { - tableInfos = append(tableInfos, table.tableInfo) - } - job, err := createChangeFeed(ctx, db, server.URL, nameEndpoint(i), tableInfos...) - if !a.NoErrorf(err, "changefeed %d", i) { - return - } - defer job.cancelJob(ctx) - } - - // Add some more lines to each table. - // Populate all the source tables - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - if !a.NoError(table.populateTable(ctx, testcase.populateCount), table.name) { - return - } - } - } - - // Make sure each table has 20 rows - for _, feedTables := range destinationTablesByFeed { - for _, table := range feedTables { - // Wait until table is populated - for { - count, err := table.getTableRowCount(ctx) - if !a.NoError(err, table) { - return - } - if count == testcase.populateCount*2 { - break - } - } - } - } - - // Update all rows in the source table. - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - a.NoErrorf(table.updateAll(ctx), "updateAll %s", table) - } - } - - // Make sure each table has 20 rows - for i, feedTables := range destinationTablesByFeed { - for j, table := range feedTables { - tableB, err := table.maxB(ctx) - if !a.NoError(err, table.String()) { - return - } - sourceB, err := sourceTablesByFeed[i][j].maxB(ctx) - if !a.NoError(err, sourceTablesByFeed[i][j].String()) { - return - } - if tableB == sourceB { - break - } - } - } - - // Delete all rows in the table. - for _, feedTables := range sourceTablesByFeed { - for _, table := range feedTables { - a.NoErrorf(table.deleteAll(ctx), "deleting %s", table) - } - } - - // Make sure each table is drained. - for _, feedTables := range destinationTablesByFeed { - for _, table := range feedTables { - for { - count, err := table.getTableRowCount(ctx) - if !a.NoError(err) { - return - } - if count == 0 { - break - } - } - } - } - }) - } -} - -func TestLargeClobs(t *testing.T) { - const clobSize = 5 * 1024 - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestClobTable(ctx, db, dbName, clobSize) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestClobTable(ctx, db, dbName, clobSize) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - t.Log("Waiting for sync") - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -// TestComputedColumns ensures that tables which contain computed (or -// otherwise magic) columns can be syndicated. -func TestComputedColumns(t *testing.T) { - if strings.Contains(dbVersion, "v20.2.") { - t.Skip("VIRTUAL columns not supported on v20.2") - } - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestComputedTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestComputedTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); !a.Equal(10, count, "row count") || !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Create a test http server - handler := createHandler(db, sinks) - server := httptest.NewServer( - http.HandlerFunc(handler), - ) - defer server.Close() - t.Log(server.URL) - - job, err := createChangeFeed(ctx, db, server.URL, endpointTest, tableFrom.tableInfo) - if !a.NoError(err) { - return - } - defer job.cancelJob(ctx) - - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - t.Log("Waiting for sync") - if !a.NoError(loopUntilSync(ctx, tableFrom, tableTo)) { - return - } - - // Make sure sink table is empty here. - sink := sinks.FindSink(endpointTest, tableFrom.name) - sinkCount, err := getRowCount(ctx, db, sink.sinkTableFullName) - a.Equal(0, sinkCount, "expected empty sink table") - a.NoError(err) -} - -func loopUntilMaxB( - ctx context.Context, - tableTo, tableFrom interface { - maxB(context.Context) (int, error) - }, -) error { - for { - if err := ctx.Err(); err != nil { - return err - } - toCount, err := tableTo.maxB(ctx) - if err != nil { - return errors.Wrap(err, "querying to") - } - fromCount, err := tableFrom.maxB(ctx) - if err != nil { - return errors.Wrap(err, "querying from") - } - if toCount == fromCount { - break - } - } - return nil -} - -func loopUntilSync( - ctx context.Context, - tableTo, tableFrom interface { - getTableRowCount(context.Context) (int, error) - }, -) error { - for { - if err := ctx.Err(); err != nil { - return err - } - toCount, err := tableTo.getTableRowCount(ctx) - if err != nil { - return errors.Wrap(err, "querying to") - } - fromCount, err := tableFrom.getTableRowCount(ctx) - if err != nil { - return errors.Wrap(err, "querying from") - } - if toCount == fromCount { - break - } - } - return nil -} - -// clobData returns a reader that will generate some number of bytes. -// The nonce value is used to perturb the sequence. -func clobData(legnth, nonce int) io.Reader { - ret := &io.LimitedReader{R: &clobSourceReader{}, N: int64(nonce + legnth)} - nonce = nonce % len(clobSourceTest) - _, _ = io.CopyN(io.Discard, ret, int64(nonce)) - return ret -} - -const clobSourceTest = "_abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ!" - -// clobSourceReader returns an infinitely long sequence of data. Use -// the clobData function instead. -type clobSourceReader struct{} - -// Read will fill the buffer with data. -func (c *clobSourceReader) Read(p []byte) (n int, err error) { - ret := len(p) - for len(p) >= len(clobSourceTest) { - copy(p, clobSourceTest) - p = p[len(clobSourceTest):] - } - if rem := len(p); rem > 0 { - copy(p, clobSourceTest[:rem]) - } - return ret, nil -} diff --git a/resolved_table.go b/resolved_table.go deleted file mode 100644 index 9a4e0374..00000000 --- a/resolved_table.go +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "encoding/json" - "fmt" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" -) - -const resolvedTableSchema = ` -CREATE TABLE IF NOT EXISTS %s ( - endpoint STRING PRIMARY KEY, - nanos INT NOT NULL, - logical INT NOT NULL -) -` - -// Make this an option? -const resolvedTableName = `_release` - -const resolvedTableQuery = `SELECT endpoint, nanos, logical FROM %s WHERE endpoint = $1` - -const resolvedTableWrite = `UPSERT INTO %s (endpoint, nanos, logical) VALUES ($1, $2, $3)` - -func resolvedFullTableName() string { - return fmt.Sprintf("%s.%s", *sinkDB, resolvedTableName) -} - -// CreateResolvedTable creates a release table if none exists. -func CreateResolvedTable(ctx context.Context, db *pgxpool.Pool) error { - return Execute(ctx, db, fmt.Sprintf(resolvedTableSchema, resolvedFullTableName())) -} - -// ResolvedLine is used to parse a json line in the request body of a resolved -// message. -type ResolvedLine struct { - // These are use for parsing the resolved line. - Resolved string `json:"resolved"` - - // There are used for storing back into the resolved table. - nanos int64 - logical int - endpoint string -} - -func parseResolvedLine(rawBytes []byte, endpoint string) (ResolvedLine, error) { - resolvedLine := ResolvedLine{ - endpoint: endpoint, - } - json.Unmarshal(rawBytes, &resolvedLine) - - // Prase the timestamp into nanos and logical. - var err error - resolvedLine.nanos, resolvedLine.logical, err = parseSplitTimestamp(resolvedLine.Resolved) - if err != nil { - return ResolvedLine{}, err - } - if resolvedLine.nanos == 0 { - return ResolvedLine{}, fmt.Errorf("no nano component to the 'updated' timestamp field") - } - - return resolvedLine, nil -} - -// getPreviousResolvedTimestamp returns the last recorded resolved for a -// specific endpoint. -func getPreviousResolved(ctx context.Context, tx pgxtype.Querier, endpoint string) (ResolvedLine, error) { - // Needs retry. - var resolvedLine ResolvedLine - err := tx.QueryRow(ctx, - fmt.Sprintf(resolvedTableQuery, resolvedFullTableName()), endpoint, - ).Scan(&(resolvedLine.endpoint), &(resolvedLine.nanos), &(resolvedLine.logical)) - switch err { - case pgx.ErrNoRows: - // No line exists yet, go back to the start of time. - return ResolvedLine{endpoint: endpoint}, nil - case nil: - // Found the line. - return resolvedLine, nil - default: - return ResolvedLine{}, err - } -} - -// Writes the updated timestamp to the resolved table. -func (rl ResolvedLine) writeUpdated(ctx context.Context, tx pgxtype.Querier) error { - // Needs retry. - _, err := tx.Exec(ctx, fmt.Sprintf(resolvedTableWrite, resolvedFullTableName()), - rl.endpoint, rl.nanos, rl.logical, - ) - return err -} diff --git a/resolved_table_test.go b/resolved_table_test.go deleted file mode 100644 index a8b52aee..00000000 --- a/resolved_table_test.go +++ /dev/null @@ -1,190 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "testing" - - "github.com/jackc/pgx/v4/pgxpool" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -func (rl ResolvedLine) writeUpdatedDB(ctx context.Context, db *pgxpool.Pool) error { - return Retry(ctx, func(ctx context.Context) error { - return rl.writeUpdated(ctx, db) - }) -} - -func getPreviousResolvedDB(ctx context.Context, db *pgxpool.Pool, endpoint string) (ResolvedLine, error) { - var resolvedLine ResolvedLine - if err := Retry(ctx, func(ctx context.Context) error { - var err error - resolvedLine, err = getPreviousResolved(ctx, db, endpoint) - return err - }); err != nil { - return ResolvedLine{}, err - } - return resolvedLine, nil -} - -func TestParseResolvedLine(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedNanos int64 - expectedLogical int - expectedEndpoint string - }{ - { - `{"resolved": "1586020760120222000.0000000000"}`, - true, 1586020760120222000, 0, "endpoint.sql", - }, - { - `{}`, - false, 0, 0, "", - }, - { - `"resolved": "1586020760120222000"}`, - false, 0, 0, "", - }, - { - `{"resolved": "0.0000000000"}`, - false, 0, 0, "", - }, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - a := assert.New(t) - actual, actualErr := parseResolvedLine([]byte(test.testcase), "endpoint.sql") - if test.expectedPass && !a.NoError(actualErr) { - return - } - if !test.expectedPass { - return - } - a.Equal(test.expectedNanos, actual.nanos, "nanos") - a.Equal(test.expectedLogical, actual.logical, "logical") - a.Equal(test.expectedEndpoint, actual.endpoint, "endpoint") - }) - } -} - -func TestResolvedTable(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, _, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - if !a.NoError(CreateResolvedTable(ctx, db)) { - return - } - - checkResolved := func(y ResolvedLine, z ResolvedLine) bool { - return a.Equal(y.endpoint, z.endpoint, "endpoint") && - a.Equal(y.nanos, z.nanos, "nanos") && - a.Equal(y.logical, z.logical, "logical") - } - - // Make sure there are no rows in the table yet. - if rowCount, err := getRowCount(ctx, db, resolvedFullTableName()); !a.NoError(err) || - !a.Equal(0, rowCount) { - return - } - - // Find no previous value for endpoint "one". - if one, err := getPreviousResolvedDB(ctx, db, "one"); !a.NoError(err) || - !checkResolved(ResolvedLine{endpoint: "one"}, one) { - return - } - - // Push 10 updates rows to the resolved table and check each one. - for i := 0; i < 10; i++ { - newOne := ResolvedLine{ - endpoint: "one", - nanos: int64(i), - logical: i, - } - if err := newOne.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - if previousOne, err := getPreviousResolvedDB(ctx, db, "one"); !a.NoError(err) || - !checkResolved(newOne, previousOne) { - return - } - } - - // Now do the same for a second endpoint. - if two, err := getPreviousResolvedDB(ctx, db, "two"); !a.NoError(err) || - !checkResolved(ResolvedLine{endpoint: "two"}, two) { - return - } - - // Push 10 updates rows to the resolved table and check each one. - for i := 0; i < 10; i++ { - newOne := ResolvedLine{ - endpoint: "two", - nanos: int64(i), - logical: i, - } - if err := newOne.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - if previousOne, err := getPreviousResolvedDB(ctx, db, "two"); !a.NoError(err) || - !checkResolved(newOne, previousOne) { - return - } - } - - // Now intersperse the updates. - for i := 100; i < 120; i++ { - newResolved := ResolvedLine{ - nanos: int64(i), - logical: i, - } - if i%2 == 0 { - newResolved.endpoint = "one" - } else { - newResolved.endpoint = "two" - } - - if err := newResolved.writeUpdatedDB(ctx, db); !a.NoError(err) { - return - } - previousResolved, err := getPreviousResolvedDB(ctx, db, newResolved.endpoint) - if !a.NoError(err) || !checkResolved(newResolved, previousResolved) { - return - } - } - - // Finally, check to make sure that there are only 2 lines in the resolved - // table. - rowCount, err := getRowCount(ctx, db, resolvedFullTableName()) - a.Equal(2, rowCount, "rowCount") - a.NoError(err) -} diff --git a/sink.go b/sink.go deleted file mode 100644 index 9b175d4e..00000000 --- a/sink.go +++ /dev/null @@ -1,329 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bufio" - "bytes" - "context" - "encoding/json" - "fmt" - "log" - "net/http" - "sort" - "strings" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4/pgxpool" -) - -// Sink holds all the info needed for a specific table. -type Sink struct { - originalTableName string - resultTableFullName string - sinkTableFullName string - primaryKeyColumns []string - endpoint string - ignoredColumns map[string]struct{} -} - -// CreateSink creates all the required tables and returns a new Sink. -func CreateSink( - ctx context.Context, db *pgxpool.Pool, - originalTable string, resultDB string, resultTable string, endpoint string, -) (*Sink, error) { - // Check to make sure the table exists. - resultTableFullName := fmt.Sprintf("%s.%s", resultDB, resultTable) - exists, err := TableExists(ctx, db, resultDB, resultTable) - if err != nil { - return nil, err - } - if !exists { - return nil, fmt.Errorf("table %s could not be found", resultTableFullName) - } - - sinkTableFullName := SinkTableFullName(resultDB, resultTable) - if err := CreateSinkTable(ctx, db, sinkTableFullName); err != nil { - return nil, err - } - - columns, err := GetPrimaryKeyColumns(ctx, db, resultTableFullName) - if err != nil { - return nil, err - } - - toIgnore, err := GetIgnoredColumns(ctx, db, resultTableFullName) - if err != nil { - return nil, err - } - ignoreMap := make(map[string]struct{}, len(toIgnore)) - for _, col := range toIgnore { - ignoreMap[col] = struct{}{} - } - - sink := &Sink{ - originalTableName: originalTable, - resultTableFullName: resultTableFullName, - sinkTableFullName: sinkTableFullName, - primaryKeyColumns: columns, - endpoint: endpoint, - ignoredColumns: ignoreMap, - } - - return sink, nil -} - -const chunkSize = 1000 - -// HandleRequest is a handler used for this specific sink. -func (s *Sink) HandleRequest(db *pgxpool.Pool, w http.ResponseWriter, r *http.Request) { - scanner := bufio.NewScanner(r.Body) - defer r.Body.Close() - var lines []Line - for scanner.Scan() { - line, err := parseLine(scanner.Bytes()) - if err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - lines = append(lines, line) - if len(lines) >= chunkSize { - if err := WriteToSinkTable(r.Context(), db, s.sinkTableFullName, lines); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - log.Printf("%s: added %d operations", s.endpoint, chunkSize) - lines = []Line{} - } - } - if err := scanner.Err(); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if err := WriteToSinkTable(r.Context(), db, s.sinkTableFullName, lines); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - log.Printf("%s: added %d operations", s.endpoint, len(lines)) -} - -// deleteRows preforms all the deletes specified in lines. -func (s *Sink) deleteRows(ctx context.Context, tx pgxtype.Querier, lines []Line) error { - if len(lines) == 0 { - return nil - } - - var chunks [][]Line - for i := 0; i < len(lines); i += chunkSize { - end := i + chunkSize - if end > len(lines) { - end = len(lines) - } - chunks = append(chunks, lines[i:end]) - } - - for _, chunk := range chunks { - // Build the statement. - var statement strings.Builder - fmt.Fprintf(&statement, "DELETE FROM %s WHERE (", s.resultTableFullName) - for i, column := range s.primaryKeyColumns { - if i > 0 { - fmt.Fprint(&statement, ",") - } - // Placeholder index always starts at 1. - fmt.Fprintf(&statement, "%s", column) - } - fmt.Fprintf(&statement, ") IN (") - var keys []interface{} - for i, line := range chunk { - // Parse out the primary key values. - key := make([]interface{}, 0, len(s.primaryKeyColumns)) - dec := json.NewDecoder(bytes.NewReader(line.key)) - dec.UseNumber() - if err := dec.Decode(&key); err != nil { - return err - } - - if i > 0 { - fmt.Fprintf(&statement, ",") - } - fmt.Fprintf(&statement, "(") - for i, key := range key { - if i > 0 { - fmt.Fprintf(&statement, ",") - } - keys = append(keys, key) - fmt.Fprintf(&statement, "$%d", len(keys)) - } - fmt.Fprintf(&statement, ")") - } - fmt.Fprintf(&statement, ")") - - // Upsert the line - if _, err := tx.Exec(ctx, statement.String(), keys...); err != nil { - return err - } - } - return nil -} - -// upsertRows performs all upserts specified in lines. -func (s *Sink) upsertRows(ctx context.Context, tx pgxtype.Querier, lines []Line) error { - const starterColumns = 16 - if len(lines) == 0 { - return nil - } - - // Get all the column names and order them alphabetically. - allNames, err := lines[0].extractColumns(make([]string, 0, starterColumns)) - if err != nil { - return err - } - - // https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating - columnNames := allNames[:0] - for _, name := range allNames { - if _, ignored := s.ignoredColumns[name]; !ignored { - columnNames = append(columnNames, name) - } - } - sort.Strings(columnNames) - - var chunks [][]Line - for i := 0; i < len(lines); i += chunkSize { - end := i + chunkSize - if end > len(lines) { - end = len(lines) - } - chunks = append(chunks, lines[i:end]) - } - - for _, chunk := range chunks { - // Build the statement. - var statement strings.Builder - // TODO: This first part can be memoized as long as there are no schema - // changes. - fmt.Fprintf(&statement, "UPSERT INTO %s (", s.resultTableFullName) - - for i, name := range columnNames { - if i > 0 { - fmt.Fprintf(&statement, ",") - } - fmt.Fprint(&statement, name) - } - fmt.Fprint(&statement, ") VALUES ") - - var values []interface{} - for i, line := range chunk { - data := make(map[string]interface{}, starterColumns) - if err := line.parseAfter(data); err != nil { - return nil - } - if i == 0 { - fmt.Fprintf(&statement, "(") - } else { - fmt.Fprintf(&statement, ",(") - } - for j, name := range columnNames { - values = append(values, data[name]) - if j == 0 { - fmt.Fprintf(&statement, "$%d", len(values)) - } else { - fmt.Fprintf(&statement, ",$%d", len(values)) - } - } - fmt.Fprintf(&statement, ")") - } - - // Upsert the line - if _, err := tx.Exec(ctx, statement.String(), values...); err != nil { - return err - } - } - return nil -} - -// UpdateRows updates all changed rows. -func (s *Sink) UpdateRows(ctx context.Context, tx pgxtype.Querier, prev ResolvedLine, next ResolvedLine) error { - // First, gather all the rows to update. - lines, err := DrainAllRowsToUpdate(ctx, tx, s.sinkTableFullName, prev, next) - if err != nil { - return err - } - - if len(lines) == 0 { - return nil - } - - log.Printf("%s: %s executed %d operations", s.endpoint, s.sinkTableFullName, len(lines)) - - // TODO: Batch these by 100 rows? Not sure what the max should be. - - var upserts []Line - var deletes []Line - - // This must happen in reverse order and all keys must be kept track of. - // This way, we can ensure that more recent changes overwrite earlier ones - // without having to perform multiple upserts/deletes to the db. - usedKeys := make(map[string]struct{}) - for i := len(lines) - 1; i >= 0; i-- { - line := lines[i] - - // Did we updates this line already? If so, don't perform this update. - if _, exist := usedKeys[string(line.key)]; exist { - continue - } - usedKeys[string(line.key)] = struct{}{} - - // Parse the key into columns - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - key := make([]interface{}, 0, len(s.primaryKeyColumns)) - dec := json.NewDecoder(bytes.NewReader(line.key)) - dec.UseNumber() - if err := dec.Decode(&key); err != nil { - return err - } - - // Is this needed? What if we have 2 primary key columns but the 2nd one - // nullable or has a default? Does CDC send it? - if len(key) != len(s.primaryKeyColumns) { - return fmt.Errorf( - "table %s has %d primary key columns %v, but only got %d keys %v", - s.resultTableFullName, - len(s.primaryKeyColumns), - s.primaryKeyColumns, - len(key), - key, - ) - } - - // Is this a delete? - if string(line.after) == "null" { - deletes = append(deletes, line) - } else { - // This must be an upsert statement. - upserts = append(upserts, line) - } - } - - // Delete all rows - if err := s.deleteRows(ctx, tx, deletes); err != nil { - return err - } - - // Upsert all rows - return s.upsertRows(ctx, tx, upserts) -} diff --git a/sink_table.go b/sink_table.go deleted file mode 100644 index e6f7bec3..00000000 --- a/sink_table.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bytes" - "context" - "encoding/json" - "fmt" - "strconv" - "strings" - - "github.com/jackc/pgtype/pgxtype" - "github.com/jackc/pgx/v4/pgxpool" -) - -const sinkTableSchema = ` -CREATE TABLE IF NOT EXISTS %s ( - nanos INT NOT NULL, - logical INT NOT NULL, - key STRING NOT NULL, - after STRING, - PRIMARY KEY (nanos, logical, key) -) -` - -const sinkTableWrite = `UPSERT INTO %s (nanos, logical, key, after) VALUES ` - -// Timestamps are less than and up to the resolved ones. -// For this $1 and $2 are previous resolved, $3 and $4 are the current -// resolved. -const sinkTableDrainRows = ` -DELETE -FROM %s -WHERE ((nanos = $1 AND logical > $2) OR (nanos > $1)) AND - ((nanos = $3 AND logical <= $4) OR (nanos < $3)) -RETURNING nanos, logical, key, after -` - -// SinkTableFullName creates the conjoined db/table name to be used by the sink -// table. -func SinkTableFullName(resultDB string, resultTable string) string { - return fmt.Sprintf("%s.%s_%s", *sinkDB, resultDB, resultTable) -} - -// Line stores pending mutations. -type Line struct { - after json.RawMessage // The mutations to apply: {"a": 1, "b": 1} - key json.RawMessage // Primary key values: [1, 2] - nanos int64 // HLC time base - logical int // HLC logical counter -} - -// extractColumns parses the keys from the "after" payload block and -// appends them to the given slice. -func (line *Line) extractColumns(into []string) ([]string, error) { - m := make(map[string]json.RawMessage) - dec := json.NewDecoder(bytes.NewReader(line.after)) - if err := dec.Decode(&m); err != nil { - return nil, err - } - for k := range m { - into = append(into, k) - } - return into, nil -} - -// parseAfter reifies the mutations to be applied. -func (line *Line) parseAfter(into map[string]interface{}) error { - // Parse the after columns - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - dec := json.NewDecoder(bytes.NewReader(line.after)) - dec.UseNumber() - return dec.Decode(&into) -} - -// getSinkTableValues is just the statements ordered as expected for the sink -// table insert statement. -func (line Line) getSinkTableValues() []interface{} { - return []interface{}{line.nanos, line.logical, string(line.key), string(line.after)} -} - -// parseSplitTimestamp splits a timestmap of tte format NNNN.LLL into an int64 -// for the nanos and an int for the logical component. -func parseSplitTimestamp(timestamp string) (int64, int, error) { - splits := strings.Split(timestamp, ".") - if len(splits) != 2 { - return 0, 0, fmt.Errorf("can't parse timestamp %s", timestamp) - } - nanos, err := strconv.ParseInt(splits[0], 0, 0) - if err != nil { - return 0, 0, err - } - if nanos <= 0 { - return 0, 0, fmt.Errorf("nanos must be greater than 0: %d", nanos) - } - logical, err := strconv.Atoi(splits[1]) - if err != nil { - return 0, 0, err - } - return nanos, logical, nil -} - -// parseLine takes a single line from an ndjson and extracts enough -// information to be able to persist it to the staging table. -func parseLine(rawBytes []byte) (Line, error) { - var payload struct { - After json.RawMessage `json:"after"` - Key json.RawMessage `json:"key"` - Updated string `json:"updated"` - } - - // Large numbers are not turned into strings, so the UseNumber option for - // the decoder is required. - dec := json.NewDecoder(bytes.NewReader(rawBytes)) - dec.UseNumber() - if err := dec.Decode(&payload); err != nil { - return Line{}, err - } - - // Parse the timestamp into nanos and logical. - nanos, logical, err := parseSplitTimestamp(payload.Updated) - if err != nil { - return Line{}, err - } - if nanos == 0 { - return Line{}, fmt.Errorf("no nano component to the 'updated' timestamp field") - } - - return Line{ - after: payload.After, - key: payload.Key, - logical: logical, - nanos: nanos, - }, nil -} - -// CreateSinkTable creates if it does not exist, the a table used for sinking. -func CreateSinkTable(ctx context.Context, db *pgxpool.Pool, sinkTableFullName string) error { - return Execute(ctx, db, fmt.Sprintf(sinkTableSchema, sinkTableFullName)) -} - -// WriteToSinkTable upserts all lines to the sink table. Never submit more than -// 10,000 lines to this function at a time. -func WriteToSinkTable(ctx context.Context, db *pgxpool.Pool, sinkTableFullName string, lines []Line) error { - if len(lines) == 0 { - return nil - } - var statement strings.Builder - if _, err := fmt.Fprintf(&statement, sinkTableWrite, sinkTableFullName); err != nil { - return err - } - var values []interface{} - for i, line := range lines { - values = append(values, line.getSinkTableValues()...) - if i == 0 { - if _, err := fmt.Fprint(&statement, "($1,$2,$3,$4)"); err != nil { - return err - } - } else { - j := i * 4 - if _, err := fmt.Fprintf(&statement, ",($%d,$%d,$%d,$%d)", j+1, j+2, j+3, j+4); err != nil { - return err - } - } - } - - return Execute(ctx, db, statement.String(), values...) -} - -// DrainAllRowsToUpdate deletes and returns the rows that need to be -// updated from the sink table. -func DrainAllRowsToUpdate( - ctx context.Context, tx pgxtype.Querier, sinkTableFullName string, prev ResolvedLine, next ResolvedLine, -) ([]Line, error) { - rows, err := tx.Query(ctx, fmt.Sprintf(sinkTableDrainRows, sinkTableFullName), - prev.nanos, prev.logical, next.nanos, next.logical, - ) - if err != nil { - return nil, err - } - defer rows.Close() - var lines []Line - var line Line - for rows.Next() { - rows.Scan(&(line.nanos), &(line.logical), &(line.key), &(line.after)) - lines = append(lines, line) - } - return lines, nil -} diff --git a/sink_table_test.go b/sink_table_test.go deleted file mode 100644 index 9fd216c6..00000000 --- a/sink_table_test.go +++ /dev/null @@ -1,307 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "encoding/json" - "fmt" - "math" - "testing" - - "github.com/jackc/pgx/v4/pgxpool" - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -// findAllRowsToUpdateDB is a wrapper around FindAllRowsToUpdate that handles -// the transaction for testing. -func findAllRowsToUpdateDB( - ctx context.Context, db *pgxpool.Pool, sinkTableFullName string, prev ResolvedLine, next ResolvedLine, -) ([]Line, error) { - var lines []Line - - if err := Retry(ctx, func(ctx context.Context) error { - var err error - tx, err := db.Begin(ctx) - if err != nil { - return err - } - defer tx.Rollback(ctx) - lines, err = DrainAllRowsToUpdate(ctx, tx, sinkTableFullName, prev, next) - return err - }); err != nil { - return nil, err - } - return lines, nil -} - -func TestParseSplitTimestamp(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedNanos int64 - expectedLogical int - }{ - {"", false, 0, 0}, - {".", false, 0, 0}, - {"1233", false, 0, 0}, - {".1233", false, 0, 0}, - {"123.123", true, 123, 123}, - {"0.0", false, 0, 0}, - {"1586019746136571000.0000000000", true, 1586019746136571000, 0}, - {"1586019746136571000.0000000001", true, 1586019746136571000, 1}, - {"9223372036854775807.2147483647", true, math.MaxInt64, math.MaxInt32}, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - actualNanos, actualLogical, actualErr := parseSplitTimestamp(test.testcase) - if test.expectedPass == (actualErr != nil) { - t.Errorf("Expected %v, got %s", test.expectedPass, actualErr) - } - if test.expectedNanos != actualNanos { - t.Errorf("Expected %d nanos, got %d nanos", test.expectedNanos, actualNanos) - } - if test.expectedLogical != actualLogical { - t.Errorf("Expected %d nanos, got %d nanos", test.expectedLogical, actualLogical) - } - }) - } -} - -func TestParseLine(t *testing.T) { - tests := []struct { - testcase string - expectedPass bool - expectedAfter string - expectedKey string - expectedNanos int64 - expectedLogical int - }{ - { - `{"after": {"a":9,"b":9}, "key": [9], "updated": "1586020760120222000.0000000000"}`, - true, `{"a":9,"b":9}`, `[9]`, 1586020760120222000, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9]`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9], "updated": "1586020760120222000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key":, "updated": "1586020760120222000.0000000000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "key": [9], "updated": "0.0000000000"}`, - false, "", "", 0, 0, - }, - { - `{"after": {"a": 9, "b": 9}, "updated": "1586020760120222000.0000000000"}`, - false, "", "", 0, 0, - }, - } - - for i, test := range tests { - t.Run(fmt.Sprintf("%d - %s", i, test.testcase), func(t *testing.T) { - a := assert.New(t) - actual, actualErr := parseLine([]byte(test.testcase)) - if test.expectedPass && !a.NoError(actualErr) { - return - } - if !test.expectedPass { - return - } - a.Equal(test.expectedNanos, actual.nanos) - a.Equal(test.expectedLogical, actual.logical) - a.Equal(json.RawMessage(test.expectedKey), actual.key) - a.Equal(json.RawMessage(test.expectedAfter), actual.after) - }) - } -} - -func TestWriteToSinkTable(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Give the from table a few rows - if !a.NoError(tableFrom.populateTable(ctx, 10)) { - return - } - if count, err := tableFrom.getTableRowCount(ctx); a.NoError(err) { - a.Equal(10, count) - } else { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - sink := sinks.FindSink(endpointTest, tableFrom.name) - if !a.NotNil(sink) { - return - } - - // Make sure there are no rows in the table yet. - if rowCount, err := getRowCount(ctx, db, sink.sinkTableFullName); a.NoError(err) { - a.Equal(0, rowCount) - } else { - return - } - - // Write 100 rows to the table. - var lines []Line - for i := 0; i < 100; i++ { - lines = append(lines, Line{ - nanos: int64(i), - logical: i, - key: json.RawMessage(fmt.Sprintf("[%d]", i)), - after: json.RawMessage(fmt.Sprintf(`{"a": %d`, i)), - }) - } - - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines); !a.NoError(err) { - return - } - - // Re-deliver a message to check at-least-once behavior. - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines[:1]); !a.NoError(err) { - return - } - - // Check to see if there are indeed 100 rows in the table. - if rowCount, err := getRowCount(ctx, db, sink.sinkTableFullName); a.NoError(err) { - a.Equal(100, rowCount) - } -} - -func TestFindAllRowsToUpdate(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - // Create a new _cdc_sink db - if !a.NoError(createSinkDB(ctx, db)) { - return - } - defer dropSinkDB(ctx, db) - - // Create the table to import from - tableFrom, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the table to receive into - tableTo, err := createTestSimpleTable(ctx, db, dbName) - if !a.NoError(err) { - return - } - - // Create the sinks and sink - sinks, err := CreateSinks(ctx, db, createConfig(tableFrom.tableInfo, tableTo.tableInfo, endpointTest)) - if !a.NoError(err) { - return - } - - // Insert 100 rows into the table. - sink := sinks.FindSink(endpointTest, tableFrom.name) - var lines []Line - for i := 0; i < 10; i++ { - for j := 0; j < 10; j++ { - lines = append(lines, Line{ - nanos: int64(i), - logical: j, - after: json.RawMessage(fmt.Sprintf("{a=%d,b=%d}", i, j)), - key: json.RawMessage(fmt.Sprintf("[%d]", i)), - }) - } - } - if err := WriteToSinkTable(ctx, db, sink.sinkTableFullName, lines); !a.NoError(err) { - return - } - - // Now find those rows from the start. - for i := 0; i < 10; i++ { - prev := ResolvedLine{ - endpoint: "test", - nanos: 0, - logical: 0, - } - next := ResolvedLine{ - endpoint: "test", - nanos: int64(i), - logical: i, - } - lines, err := findAllRowsToUpdateDB(ctx, db, sink.sinkTableFullName, prev, next) - if a.NoError(err) { - a.Len(lines, i*11) - } - } - - // And again but from the previous. - for i := 1; i < 10; i++ { - prev := ResolvedLine{ - endpoint: "test", - nanos: int64(i - 1), - logical: i - 1, - } - next := ResolvedLine{ - endpoint: "test", - nanos: int64(i), - logical: i, - } - lines, err := findAllRowsToUpdateDB(ctx, db, sink.sinkTableFullName, prev, next) - if a.NoError(err) { - a.Len(lines, 11) - } - } -} diff --git a/sinks.go b/sinks.go deleted file mode 100644 index 4a6ce900..00000000 --- a/sinks.go +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "bufio" - "context" - "fmt" - "log" - "net/http" - "strings" - "sync" - - "github.com/jackc/pgx/v4/pgxpool" -) - -// Sinks holds a map of all known sinks. -type Sinks struct { - // Is this mutex overkill? Is it needed? There should never be any writes - // to the map after initialization. But guidance here is fuzzy, so I'll keep - // it in. - sync.RWMutex - - // endpoints can have multiple tables - sinksByTableByEndpoint map[string]map[string]*Sink -} - -// CreateSinks creates a new table sink and populates it based on the pass in -// config. -func CreateSinks(ctx context.Context, db *pgxpool.Pool, config Config) (*Sinks, error) { - sinks := &Sinks{ - sinksByTableByEndpoint: make(map[string]map[string]*Sink), - } - - for _, entry := range config { - if err := sinks.AddSink(ctx, db, entry); err != nil { - return nil, err - } - } - - return sinks, nil -} - -// AddSink creates and adds a new sink to the sinks map. -func (s *Sinks) AddSink(ctx context.Context, db *pgxpool.Pool, entry ConfigEntry) error { - s.Lock() - defer s.Unlock() - - sourceTable := strings.ToLower(strings.TrimSpace(entry.SourceTable)) - destinationDB := strings.ToLower(strings.TrimSpace(entry.DestinationDatabase)) - destinationTable := strings.ToLower(strings.TrimSpace(entry.DestinationTable)) - endpoint := strings.ToLower(strings.TrimSpace(entry.Endpoint)) - - // First check to make sure the endpoint exists, if it doesn't create one. - var sinksByTable map[string]*Sink - var exist bool - if sinksByTable, exist = s.sinksByTableByEndpoint[endpoint]; !exist { - sinksByTable = make(map[string]*Sink) - s.sinksByTableByEndpoint[endpoint] = sinksByTable - } - - // Check for a double table - if _, exist := sinksByTable[sourceTable]; exist { - return fmt.Errorf("duplicate table configuration entry found: %s", sourceTable) - } - - sink, err := CreateSink(ctx, db, sourceTable, destinationDB, destinationTable, endpoint) - if err != nil { - return err - } - sinksByTable[sourceTable] = sink - s.sinksByTableByEndpoint[endpoint] = sinksByTable - return nil -} - -// FindSink returns a sink for a given table name and endpoint. -func (s *Sinks) FindSink(endpoint string, table string) *Sink { - s.RLock() - defer s.RUnlock() - sinksByTable, exist := s.sinksByTableByEndpoint[endpoint] - if !exist { - return nil - } - return sinksByTable[table] -} - -// GetAllSinksByEndpoint gets a list of all known sinks. -func (s *Sinks) GetAllSinksByEndpoint(endpoint string) []*Sink { - s.RLock() - defer s.RUnlock() - var allSinks []*Sink - if sinksByTable, exists := s.sinksByTableByEndpoint[endpoint]; exists { - for _, sink := range sinksByTable { - allSinks = append(allSinks, sink) - } - } - return allSinks -} - -// HandleResolvedRequest parses and applies all the resolved upserts. -func (s *Sinks) HandleResolvedRequest( - ctx context.Context, db *pgxpool.Pool, rURL resolvedURL, w http.ResponseWriter, r *http.Request, -) { - scanner := bufio.NewScanner(r.Body) - defer r.Body.Close() - for scanner.Scan() { - next, err := parseResolvedLine(scanner.Bytes(), rURL.endpoint) - if err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - // Start the transation - if err := Retry(ctx, func(ctx context.Context) error { - tx, err := db.Begin(ctx) - if err != nil { - return err - } - defer tx.Rollback(ctx) - - // Get the previous resolved - prev, err := getPreviousResolved(ctx, tx, rURL.endpoint) - if err != nil { - return err - } - log.Printf("%s: resolved - timestamp %d.%d", next.endpoint, next.nanos, next.logical) - - // Find all rows to update and upsert them. - allSinks := s.GetAllSinksByEndpoint(rURL.endpoint) - for _, sink := range allSinks { - if err := sink.UpdateRows(ctx, tx, prev, next); err != nil { - return err - } - } - - // Write the updated resolved. - if err := next.writeUpdated(ctx, tx); err != nil { - return err - } - return tx.Commit(ctx) - }); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - } - if err := scanner.Err(); err != nil { - log.Print(err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } -} diff --git a/sql.go b/sql.go deleted file mode 100644 index 4f19f65f..00000000 --- a/sql.go +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - - "github.com/jackc/pgx/v4" - "github.com/jackc/pgx/v4/pgxpool" -) - -const sinkDBZoneConfig = `ALTER DATABASE %s CONFIGURE ZONE USING gc.ttlseconds = 600;` - -// CreateSinkDB creates a new sink db if one does not exist yet and also adds -// the resolved table. -func CreateSinkDB(ctx context.Context, db *pgxpool.Pool) error { - if err := Execute(ctx, db, fmt.Sprintf("CREATE DATABASE IF NOT EXISTS %s", *sinkDB)); err != nil { - return err - } - if *sinkDBZone { - if err := Execute(ctx, db, fmt.Sprintf(sinkDBZoneConfig, *sinkDB)); err != nil { - return err - } - } - return CreateResolvedTable(ctx, db) -} - -// DropSinkDB drops the sinkDB and all data in it. -func DropSinkDB(ctx context.Context, db *pgxpool.Pool) error { - return Execute(ctx, db, fmt.Sprintf(`DROP DATABASE IF EXISTS %s CASCADE`, *sinkDB)) -} - -const sqlTableExistsQuery = `SELECT table_name FROM [SHOW TABLES FROM %s] WHERE table_name = '%s'` - -// TableExists checks for the existence of a table. -func TableExists(ctx context.Context, db *pgxpool.Pool, dbName string, tableName string) (bool, error) { - findTableSQL := fmt.Sprintf(sqlTableExistsQuery, dbName, tableName) - var tableFound string - err := Retry(ctx, func(ctx context.Context) error { - return db.QueryRow(ctx, findTableSQL).Scan(&tableFound) - }) - switch err { - case pgx.ErrNoRows: - return false, nil - case nil: - return true, nil - default: - return false, err - } -} - -const sqlGetIgnoredColumns = ` -SELECT column_name FROM [SHOW COLUMNS FROM %s] WHERE generation_expression != '' -` - -// GetIgnoredColumns returns the names of columns defined in the table -// which should not be updated. This is used to filter out columns -// related to certain database features, such as hash-sharded indexes. -func GetIgnoredColumns(ctx context.Context, db *pgxpool.Pool, tableFullName string) ([]string, error) { - findKeyColumns := fmt.Sprintf(sqlGetIgnoredColumns, tableFullName) - var columns []string - if err := Retry(ctx, func(ctx context.Context) error { - var columnsInternal []string - rows, err := db.Query(ctx, findKeyColumns) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var column string - if err := rows.Scan(&column); err != nil { - return err - } - columnsInternal = append(columnsInternal, column) - } - columns = columnsInternal - return nil - }); err != nil { - return nil, err - } - return columns, nil -} - -const sqlGetPrimaryKeyColumnsQuery = ` -SELECT column_name FROM [SHOW INDEX FROM %s] -WHERE index_name = 'primary' - AND NOT storing -ORDER BY seq_in_index -` - -// GetPrimaryKeyColumns returns the column names for the primary key index for -// a table, in order. -func GetPrimaryKeyColumns(ctx context.Context, db *pgxpool.Pool, tableFullName string) ([]string, error) { - // Needs retry. - findKeyColumns := fmt.Sprintf(sqlGetPrimaryKeyColumnsQuery, tableFullName) - var columns []string - if err := Retry(ctx, func(ctx context.Context) error { - var columnsInternal []string - rows, err := db.Query(ctx, findKeyColumns) - if err != nil { - return err - } - defer rows.Close() - - for rows.Next() { - var column string - if err := rows.Scan(&column); err != nil { - return err - } - columnsInternal = append(columnsInternal, column) - } - columns = columnsInternal - return nil - }); err != nil { - return nil, err - } - return columns, nil -} - -// Execute is just a wrapper around Retry that can be used for sql -// queries that don't have any return values. -func Execute(ctx context.Context, db *pgxpool.Pool, query string, args ...interface{}) error { - return Retry(ctx, func(ctx context.Context) error { - _, err := db.Exec(ctx, query, args...) - return err - }) -} diff --git a/sql_test.go b/sql_test.go deleted file mode 100644 index 6b064655..00000000 --- a/sql_test.go +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "context" - "fmt" - "testing" - - "github.com/stretchr/testify/assert" -) - -// These test require an insecure cockroach server is running on the default -// port with the default root user with no password. - -func TestGetPrimaryKeyColumns(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Create the test db - db, dbName, dbClose, err := getDB(ctx) - if !a.NoError(err) { - return - } - defer dbClose() - - testcases := []struct { - tableSchema string - primaryKeys []string - }{ - { - "a INT", - []string{"rowid"}, - }, - { - "a INT PRIMARY KEY", - []string{"a"}, - }, - { - "a INT, b INT, PRIMARY KEY (a,b)", - []string{"a", "b"}, - }, - { - "a INT, b INT, PRIMARY KEY (b,a)", - []string{"b", "a"}, - }, - { - "a INT, b INT, c INT, PRIMARY KEY (b,a,c)", - []string{"b", "a", "c"}, - }, - } - - for i, test := range testcases { - t.Run(fmt.Sprintf("%d:%s", i, test.tableSchema), func(t *testing.T) { - a := assert.New(t) - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - tableFullName := fmt.Sprintf("%s.test_%d", dbName, i) - if !a.NoError(Execute(ctx, db, - fmt.Sprintf(`CREATE TABLE %s ( %s )`, tableFullName, test.tableSchema))) { - return - } - columns, err := GetPrimaryKeyColumns(ctx, db, tableFullName) - if !a.NoError(err) { - return - } - a.Equal(test.primaryKeys, columns) - }) - } -} diff --git a/url.go b/url.go deleted file mode 100644 index 1a6cfb76..00000000 --- a/url.go +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2020 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "fmt" - "regexp" - "strconv" - "strings" - "time" -) - -// This is the timestamp format: YYYYMMDDHHMMSSNNNNNNNNNLLLLLLLLLL -// Formatting const stolen from https://github.com/cockroachdb/cockroach/blob/master/pkg/ccl/changefeedccl/sink_cloudstorage.go#L48 -const timestampDateTimeFormat = "20060102150405" - -func parseTimestamp(timestamp string, logical string) (time.Time, int, error) { - if len(timestamp) != 23 { - return time.Time{}, 0, fmt.Errorf("can't parse timestamp %s", timestamp) - } - if len(logical) != 10 { - return time.Time{}, 0, fmt.Errorf("can't parse logical timestamp %s", logical) - } - - // Parse the date and time. - timestampParsed, err := time.Parse(timestampDateTimeFormat, timestamp[0:14]) - if err != nil { - return time.Time{}, 0, err - } - - // Parse out the nanos - nanos, err := time.ParseDuration(timestamp[14:23] + "ns") - if err != nil { - return time.Time{}, 0, err - } - timestampParsed.Add(nanos) - - // Parse out the logical timestamp - logicalParsed, err := strconv.Atoi(logical) - if err != nil { - return time.Time{}, 0, err - } - - return timestampParsed, logicalParsed, nil -} - -// See https://www.cockroachlabs.com/docs/stable/create-changefeed.html#general-file-format -// Example: /test.sql//2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1.ndjson -// Format is: /[endpoint]/[date]/[timestamp]-[uniquer]-[topic]-[schema-id] -var ( - ndjsonRegex = regexp.MustCompile(`/(?P[^/]*)/(?P\d{4}-\d{2}-\d{2})/(?P.+)-(?P[^-]+)-(?P[^-]+).ndjson$`) - ndjsonEndpointIdx = ndjsonRegex.SubexpIndex("endpoint") - ndjsonTopicIdx = ndjsonRegex.SubexpIndex("topic") -) - -// ndjsonURL contains all the parsed info from an ndjson url. -type ndjsonURL struct { - endpoint string - topic string -} - -func parseNdjsonURL(url string) (ndjsonURL, error) { - match := ndjsonRegex.FindStringSubmatch(url) - if match == nil { - return ndjsonURL{}, fmt.Errorf("can't parse url %s", url) - } - - return ndjsonURL{ - endpoint: match[ndjsonEndpointIdx], - topic: match[ndjsonTopicIdx], - }, nil -} - -// Example: /test.sql/2020-04-04/202004042351304139680000000000000.RESOLVED -// Format is: /[endpoint]/[date]/[timestamp].RESOLVED -var resolvedRegex = regexp.MustCompile(`^/(?P.*)/(?P\d{4}-\d{2}-\d{2})/(?P\d{33}).RESOLVED$`) - -// resolvedURL contains all the parsed info from an ndjson url. -type resolvedURL struct { - endpoint string - date string - timestamp time.Time - timestampLogical int -} - -func parseResolvedURL(url string) (resolvedURL, error) { - match := resolvedRegex.FindStringSubmatch(url) - if len(match) != resolvedRegex.NumSubexp()+1 { - return resolvedURL{}, fmt.Errorf("can't parse url %s", url) - } - - var resolved resolvedURL - for i, name := range resolvedRegex.SubexpNames() { - switch name { - case "date": - resolved.date = strings.ToLower(match[i]) - case "timestamp": - if len(match[i]) != 33 { - return resolvedURL{}, fmt.Errorf( - "expected timestamp to be 33 characters long, got %d: %s", - len(match[i]), match[i], - ) - } - var err error - resolved.timestamp, resolved.timestampLogical, err = parseTimestamp( - match[i][0:23], match[i][23:33], - ) - if err != nil { - return resolvedURL{}, err - } - case "endpoint": - resolved.endpoint = strings.ToLower(match[i]) - default: - // Skip all the rest. - } - } - - return resolved, nil -} diff --git a/url_test.go b/url_test.go deleted file mode 100644 index 407a8747..00000000 --- a/url_test.go +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2021 The Cockroach Authors. -// -// Use of this software is governed by the Business Source License -// included in the file licenses/BSL.txt. -// -// As of the Change Date specified in that file, in accordance with -// the Business Source License, use of this software will be governed -// by the Apache License, Version 2.0, included in the file -// licenses/APL.txt. - -package main - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestNdjsonURL(t *testing.T) { - a := assert.New(t) - const u = "/endpoint/2020-04-02/202004022058072107140000000000000-56087568dba1e6b8-1-72-00000000-test_table-1f.ndjson" - - p, err := parseNdjsonURL(u) - if a.NoError(err) { - a.Equal("endpoint", p.endpoint) - a.Equal("test_table", p.topic) - } -}