Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nonce: support reading a new Claimed state. #136

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 15 additions & 17 deletions pkg/code/data/nonce/memory/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import (
"sort"
"sync"

"github.com/code-payments/code-server/pkg/database/query"
"github.com/code-payments/code-server/pkg/code/data/nonce"
"github.com/code-payments/code-server/pkg/database/query"
)

type store struct {
Expand Down Expand Up @@ -58,27 +58,23 @@ func (s *store) findAddress(address string) *nonce.Record {
}

func (s *store) findByState(state nonce.State) []*nonce.Record {
res := make([]*nonce.Record, 0)
for _, item := range s.records {
if item.State == state {
res = append(res, item)
}
}
return res
return s.findFn(func(nonce *nonce.Record) bool {
return nonce.State == state
})
}

func (s *store) findByStateAndPurpose(state nonce.State, purpose nonce.Purpose) []*nonce.Record {
return s.findFn(func(record *nonce.Record) bool {
return record.State == state && record.Purpose == purpose
})
}

func (s *store) findFn(f func(nonce *nonce.Record) bool) []*nonce.Record {
res := make([]*nonce.Record, 0)
for _, item := range s.records {
if item.State != state {
continue
}

if item.Purpose != purpose {
continue
if f(item) {
res = append(res, item)
}

res = append(res, item)
}
return res
}
Expand Down Expand Up @@ -194,7 +190,9 @@ func (s *store) GetRandomAvailableByPurpose(ctx context.Context, purpose nonce.P
s.mu.Lock()
defer s.mu.Unlock()

items := s.findByStateAndPurpose(nonce.StateAvailable, purpose)
items := s.findFn(func(n *nonce.Record) bool {
return n.Purpose == purpose && n.IsAvailable()
})
if len(items) == 0 {
return nil, nonce.ErrNonceNotFound
}
Expand Down
70 changes: 53 additions & 17 deletions pkg/code/data/nonce/nonce.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package nonce
import (
"crypto/ed25519"
"errors"
"time"

"github.com/mr-tron/base58"
)
Expand All @@ -20,14 +21,11 @@ const (
StateAvailable // The nonce is available to be used by a payment intent, subscription, or other nonce-related transaction.
StateReserved // The nonce is reserved by a payment intent, subscription, or other nonce-related transaction.
StateInvalid // The nonce account is invalid (e.g. insufficient funds, etc).
StateClaimed // The nonce is claimed for future use by a process (identified by Node ID).
)

// Split nonce pool across different use cases. This has an added benefit of:
// - Solving for race conditions without distributed locks.
// - Avoiding different use cases from starving each other and ending up in a
// deadlocked state. Concretely, it would be really bad if clients could starve
// internal processes from creating transactions that would allow us to progress
// and submit existing transactions.
// Purpose indicates the intended use purpose of the nonce. By partitioning nonce's by
// purpose, we help prevent various use cases from starving each other.
type Purpose uint8

const (
Expand All @@ -46,22 +44,46 @@ type Record struct {
Purpose Purpose
State State

// Contains the NodeId that transitioned the state into StateClaimed.
//
// Should be ignored if State != StateClaimed.
ClaimNodeId string

// The time at which StateClaimed is no longer valid, and the state should
// be considered StateAvailable.
//
// Should be ignored if State != StateClaimed.
ClaimExpiresAt time.Time

Signature string
}

func (r *Record) GetPublicKey() (ed25519.PublicKey, error) {
return base58.Decode(r.Address)
}

func (r *Record) IsAvailable() bool {
if r.State == StateAvailable {
return true
}
if r.State != StateClaimed {
return false
}

return time.Now().After(r.ClaimExpiresAt)
}

func (r *Record) Clone() Record {
return Record{
Id: r.Id,
Address: r.Address,
Authority: r.Authority,
Blockhash: r.Blockhash,
Purpose: r.Purpose,
State: r.State,
Signature: r.Signature,
Id: r.Id,
Address: r.Address,
Authority: r.Authority,
Blockhash: r.Blockhash,
Purpose: r.Purpose,
State: r.State,
ClaimNodeId: r.ClaimNodeId,
ClaimExpiresAt: r.ClaimExpiresAt,
Signature: r.Signature,
}
}

Expand All @@ -72,21 +94,33 @@ func (r *Record) CopyTo(dst *Record) {
dst.Blockhash = r.Blockhash
dst.Purpose = r.Purpose
dst.State = r.State
dst.ClaimNodeId = r.ClaimNodeId
dst.ClaimExpiresAt = r.ClaimExpiresAt
dst.Signature = r.Signature
}

func (v *Record) Validate() error {
if len(v.Address) == 0 {
func (r *Record) Validate() error {
if len(r.Address) == 0 {
return errors.New("nonce account address is required")
}

if len(v.Authority) == 0 {
if len(r.Authority) == 0 {
return errors.New("authority address is required")
}

if v.Purpose == PurposeUnknown {
if r.Purpose == PurposeUnknown {
return errors.New("nonce purpose must be set")
}

if r.State == StateClaimed {
if r.ClaimNodeId == "" {
return errors.New("missing claim node id")
}
if r.ClaimExpiresAt == (time.Time{}) || r.ClaimExpiresAt.IsZero() {
return errors.New("missing claim expiry date")
}
}

return nil
}

Expand All @@ -102,6 +136,8 @@ func (s State) String() string {
return "reserved"
case StateInvalid:
return "invalid"
case StateClaimed:
return "claimed"
}

return "unknown"
Expand Down
150 changes: 94 additions & 56 deletions pkg/code/data/nonce/postgres/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package postgres
import (
"context"
"database/sql"
"time"

"github.com/jmoiron/sqlx"

Expand All @@ -17,13 +18,15 @@ const (
)

type nonceModel struct {
Id sql.NullInt64 `db:"id"`
Address string `db:"address"`
Authority string `db:"authority"`
Blockhash string `db:"blockhash"`
Purpose uint `db:"purpose"`
State uint `db:"state"`
Signature string `db:"signature"`
Id sql.NullInt64 `db:"id"`
Address string `db:"address"`
Authority string `db:"authority"`
Blockhash string `db:"blockhash"`
Purpose uint `db:"purpose"`
State uint `db:"state"`
Signature string `db:"signature"`
ClaimNodeId string `db:"claim_node_id"`
ClaimExpiresAtMs int64 `db:"claim_expires_at"`
Comment on lines +28 to +29
Copy link
Contributor Author

@mfycheng mfycheng Jun 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From the docs[1], as long as we allow these two fields to be NULL (and have it as the default), then there won't need to be any rewrite of the underlying tables. This means the ALTER TABLE command will place an exclusive access on the table, but the duration of the command should be quick.

[1] https://www.postgresql.org/docs/current/sql-altertable.html

}

func toNonceModel(obj *nonce.Record) (*nonceModel, error) {
Expand All @@ -32,33 +35,37 @@ func toNonceModel(obj *nonce.Record) (*nonceModel, error) {
}

return &nonceModel{
Id: sql.NullInt64{Int64: int64(obj.Id), Valid: true},
Address: obj.Address,
Authority: obj.Authority,
Blockhash: obj.Blockhash,
Purpose: uint(obj.Purpose),
State: uint(obj.State),
Signature: obj.Signature,
Id: sql.NullInt64{Int64: int64(obj.Id), Valid: true},
Address: obj.Address,
Authority: obj.Authority,
Blockhash: obj.Blockhash,
Purpose: uint(obj.Purpose),
State: uint(obj.State),
Signature: obj.Signature,
ClaimNodeId: obj.ClaimNodeId,
ClaimExpiresAtMs: obj.ClaimExpiresAt.UnixMilli(),
}, nil
}

func fromNonceModel(obj *nonceModel) *nonce.Record {
return &nonce.Record{
Id: uint64(obj.Id.Int64),
Address: obj.Address,
Authority: obj.Authority,
Blockhash: obj.Blockhash,
Purpose: nonce.Purpose(obj.Purpose),
State: nonce.State(obj.State),
Signature: obj.Signature,
Id: uint64(obj.Id.Int64),
Address: obj.Address,
Authority: obj.Authority,
Blockhash: obj.Blockhash,
Purpose: nonce.Purpose(obj.Purpose),
State: nonce.State(obj.State),
Signature: obj.Signature,
ClaimNodeId: obj.ClaimNodeId,
ClaimExpiresAt: time.UnixMilli(obj.ClaimExpiresAtMs),
}
}

func (m *nonceModel) dbSave(ctx context.Context, db *sqlx.DB) error {
return pgutil.ExecuteInTx(ctx, db, sql.LevelDefault, func(tx *sqlx.Tx) error {
query := `INSERT INTO ` + nonceTableName + `
(address, authority, blockhash, purpose, state, signature)
VALUES ($1,$2,$3,$4,$5,$6)
(address, authority, blockhash, purpose, state, signature, claim_node_id, claim_expires_at)
VALUES ($1,$2,$3,$4,$5,$6,$7,$8)
ON CONFLICT (address)
DO UPDATE
SET blockhash = $3, state = $5, signature = $6
Expand All @@ -75,6 +82,8 @@ func (m *nonceModel) dbSave(ctx context.Context, db *sqlx.DB) error {
m.Purpose,
m.State,
m.Signature,
m.ClaimNodeId,
m.ClaimExpiresAtMs,
).StructScan(m)

return pgutil.CheckNoRows(err, nonce.ErrInvalidNonce)
Expand Down Expand Up @@ -162,48 +171,77 @@ func dbGetAllByState(ctx context.Context, db *sqlx.DB, state nonce.State, cursor
return res, nil
}

// todo: Implementation still isn't perfect, but better than no randomness. It's
// sufficiently efficient, as long as our nonce pool is larger than the max offset.
// todo: We may need to tune the offset based on pool size and environment, but it
// should be sufficiently good enough for now.
// We query a random nonce by first selecting any available candidate from the
// total set, applying an upper limit of 100, and _then_ randomly shuffling the
// results and selecting the first. By bounding the size before ORDER BY random(),
// we avoid having to shuffle large sets of results.
//
// Previously, we would use OFFSET FLOOR(RANDOM() * 100). However, if the pool
// (post filter) size was less than 100, any selection > pool size would result
// in the OFFSET being set to zero. This meant random() disappeared for a subset
// of values. In practice, this would result in a bias, and increased contention.
//
// For example, 50 Available nonce's, 25 Claimed (expired), 25 Reserved. With Offset:
//
// 1. 50% of the time would be a random Available.
// 2. 25% of the time would be a random expired Claimed.
// 3. 25% of the time would be _the first_ Available.
//
// This meant that 25% of the time would not be random. As we pull from the pool,
// this % only increases, further causing contention.
//
// Performance wise, this approach is slightly worse, but the vast majority of the
// time is spent on the scan and filter. Below are two example query plans (from a
// small dataset in an online editor).
//
// QUERY PLAN (OFFSET):
//
// Limit (cost=17.80..35.60 rows=1 width=140) (actual time=0.019..0.019 rows=0 loops=1)
// -> Seq Scan on codewallet__core_nonce (cost=0.00..17.80 rows=1 width=140) (actual time=0.016..0.017 rows=0 loops=1)
// Filter: ((signature IS NOT NULL) AND (purpose = 1) AND ((state = 0) OR ((state = 2) AND (claim_expires_at < 200))))
// Rows Removed by Filter: 100
//
// Planning Time: 0.046 ms
// Execution Time: 0.031 ms
//
// QUERY PLAN (ORDER BY):
//
// Limit (cost=17.82..17.83 rows=1 width=148) (actual time=0.018..0.019 rows=0 loops=1)
// -> Sort (cost=17.82..17.83 rows=1 width=148) (actual time=0.018..0.018 rows=0 loops=1)
// Sort Key: (random())
// Sort Method: quicksort Memory: 25kB
// -> Subquery Scan on sub (cost=0.00..17.81 rows=1 width=148) (actual time=0.015..0.016 rows=0 loops=1)
// -> Limit (cost=0.00..17.80 rows=1 width=140) (actual time=0.015..0.015 rows=0 loops=1)
// -> Seq Scan on codewallet__core_nonce (cost=0.00..17.80 rows=1 width=140) (actual time=0.015..0.015 rows=0 loops=1)
// Filter: ((signature IS NOT NULL) AND (purpose = 1) AND ((state = 0) OR ((state = 2) AND (claim_expires_at < 200))))
// Rows Removed by Filter: 100
//
// Planning Time: 0.068 ms
// Execution Time: 0.037 ms
//
// Overall, the Seq Scan and Filter is the bulk of the work, with the ORDER BY RANDOM()
// adding a small (fixed) amount of overhead. The trade-off is negligible time complexity
// for more reliable semantics.
Comment on lines +174 to +224
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is probably a thing to really double check.

  1. I discovered this oddity when testing the distributions (which was also highlighted in previous comments). I think this change is likely worth it given the minuscule (theoretical) performance difference.
  2. The previous approach had a fallback query, which I think was to address the OFFSET selecting an offset out of range, but in testing the OFFSET was just set to 0. The new approach removes this issue (but if it was for another issue, that's important to know!)

func dbGetRandomAvailableByPurpose(ctx context.Context, db *sqlx.DB, purpose nonce.Purpose) (*nonceModel, error) {
res := &nonceModel{}
nowMs := time.Now().UnixMilli()

// Signature null check is required because some legacy records didn't have this
// set and causes this call to fail. This is a result of the field not being
// defined at the time of record creation.
//
// todo: Fix said nonce records
query := `SELECT
id, address, authority, blockhash, purpose, state, signature
FROM ` + nonceTableName + `
WHERE state = $1 AND purpose = $2 AND signature IS NOT NULL
OFFSET FLOOR(RANDOM() * 100)
LIMIT 1
`
fallbackQuery := `SELECT
id, address, authority, blockhash, purpose, state, signature
FROM ` + nonceTableName + `
WHERE state = $1 AND purpose = $2 AND signature IS NOT NULL
query := `
SELECT id, address, authority, blockhash, purpose, state, signature FROM (
SELECT id, address, authority, blockhash, purpose, state, signature
FROM ` + nonceTableName + `
WHERE ((state = $1) OR (state = $2 AND claim_expires_at < $3)) AND purpose = $4 AND signature IS NOT NULL
LIMIT 100
) sub
ORDER BY random()
LIMIT 1
`

err := db.GetContext(ctx, res, query, nonce.StateAvailable, purpose)
if err != nil {
err = pgutil.CheckNoRows(err, nonce.ErrNonceNotFound)

// No nonces found. Because our query isn't perfect, fall back to a
// strategy that will guarantee to select something if an available
// nonce exists.
if err == nonce.ErrNonceNotFound {
err := db.GetContext(ctx, res, fallbackQuery, nonce.StateAvailable, purpose)
if err != nil {
return nil, pgutil.CheckNoRows(err, nonce.ErrNonceNotFound)
}
return res, nil
}

return nil, err
}
return res, nil
err := db.GetContext(ctx, res, query, nonce.StateAvailable, nonce.StateClaimed, nowMs, purpose)
return res, pgutil.CheckNoRows(err, nonce.ErrNonceNotFound)
}
Loading
Loading