/
pg.go
226 lines (189 loc) · 5.5 KB
/
pg.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
package flycheck
import (
"context"
"fmt"
"net"
"strings"
"time"
"github.com/fly-examples/postgres-ha/pkg/flypg"
"github.com/fly-examples/postgres-ha/pkg/privnet"
"github.com/pkg/errors"
chk "github.com/superfly/fly-checks/check"
"github.com/jackc/pgtype"
"github.com/jackc/pgx/v4"
)
// CheckPostgreSQL health, replication, etc
func CheckPostgreSQL(ctx context.Context, checks *chk.CheckSuite) (*chk.CheckSuite, error) {
node, err := flypg.NewNode()
if err != nil {
return checks, errors.Wrap(err, "failed to initialize node")
}
leaderConn, err := node.NewProxyConnection(ctx)
if err != nil {
return checks, errors.Wrap(err, "failed to connect to proxy")
}
localConn, err := node.NewLocalConnection(ctx)
if err != nil {
return checks, errors.Wrap(err, "failed to connect with local node")
}
// Cleanup connections
checks.OnCompletion = func() {
leaderConn.Close(ctx)
localConn.Close(ctx)
}
leaderAddr, err := resolveServerAddr(ctx, leaderConn)
if err != nil {
return checks, err
}
isLeader := (leaderAddr == node.PrivateIP.String())
if isLeader {
checks.AddCheck("transactions", func() (string, error) {
return transactionMode(ctx, localConn, "read/write")
})
entries, err := replicationEntries(ctx, leaderConn)
if err != nil {
return checks, errors.Wrap(err, "failed to query replication info")
}
for _, entry := range entries {
msg := fmt.Sprintf("%s is lagging %s", entry.Client, entry.ReplayLag)
lag := entry.ReplayLag
checks.AddCheck("replicationLag", func() (string, error) {
if lag >= 3*time.Second {
return "", fmt.Errorf(msg)
}
return msg, nil
})
}
}
if !isLeader {
checks.AddCheck("transactions", func() (string, error) {
return transactionMode(ctx, localConn, "readonly")
})
// Ensures the the Proxy address and the primary adddress
// we are receiving updates from are the same.
checks.AddCheck("replication", func() (string, error) {
return connectedToLeader(ctx, localConn, leaderAddr)
})
}
checks.AddCheck("connections", func() (string, error) {
return connectionCount(ctx, localConn)
})
return checks, nil
}
func connectedToLeader(ctx context.Context, conn *pgx.Conn, leaderAddr string) (string, error) {
ldrAddr, err := resolvePrimaryFromStandby(ctx, conn)
if err != nil {
return "", err
}
if ldrAddr == leaderAddr {
return fmt.Sprintf("syncing from %s", leaderAddr), nil
}
return "", fmt.Errorf("primary mismatch detected: current: %q, expected %q", leaderAddr, ldrAddr)
}
func transactionMode(ctx context.Context, conn *pgx.Conn, expected string) (string, error) {
var readonly string
conn.QueryRow(ctx, "SHOW transaction_read_only;").Scan(&readonly)
var state string
if readonly == "on" {
state = "readonly"
}
if readonly == "off" {
state = "read/write"
}
if state != expected {
return "", fmt.Errorf("%s but expected %s", state, expected)
}
return state, nil
}
type ReplicationEntry struct {
Client string
ReplayLag time.Duration
}
func replicationEntries(ctx context.Context, leader *pgx.Conn) ([]ReplicationEntry, error) {
sql := `select client_addr, replay_lag from pg_stat_replication;`
rows, err := leader.Query(ctx, sql)
if err != nil {
return nil, err
}
defer rows.Close()
var entries []ReplicationEntry
for rows.Next() {
var clientAddr net.IPNet
var replayLag pgtype.Interval
err = rows.Scan(&clientAddr, &replayLag)
if err != nil {
return nil, err
}
dur := time.Duration(replayLag.Microseconds)
entry := ReplicationEntry{
Client: clientAddr.IP.String(),
ReplayLag: dur,
}
entries = append(entries, entry)
}
return entries, nil
}
func connectionCount(ctx context.Context, local *pgx.Conn) (string, error) {
sql := `select used, res_for_super as reserved, max_conn as max from
(select count(*) used from pg_stat_activity) q1,
(select setting::int res_for_super from pg_settings where name=$$superuser_reserved_connections$$) q2,
(select setting::int max_conn from pg_settings where name=$$max_connections$$) q3`
var used, reserved, max int
err := local.QueryRow(ctx, sql).Scan(&used, &reserved, &max)
if err != nil {
return "", fmt.Errorf("%v", err)
}
return fmt.Sprintf("%d used, %d reserved, %d max", used, reserved, max), nil
}
// resolvePrimary works to resolve the primary address by parsing the primary_conninfo
// configuration setting.
func resolvePrimaryFromStandby(ctx context.Context, local *pgx.Conn) (string, error) {
rows, err := local.Query(ctx, "show primary_conninfo;")
if err != nil {
return "", err
}
var primaryConn string
for rows.Next() {
err = rows.Scan(&primaryConn)
if err != nil {
return "", err
}
}
rows.Close()
// If we don't have any assigned primary, assume we are the primary.
if primaryConn == "" {
ip, err := privnet.PrivateIPv6()
if err != nil {
return "", err
}
return ip.String(), nil
}
connMap := map[string]string{}
for _, entry := range strings.Split(primaryConn, " ") {
if entry == "" {
continue
}
parts := strings.Split(entry, "=")
connMap[parts[0]] = parts[1]
}
return connMap["host"], nil
}
// resolveServerAddr takes a connection and will return the destination server address.
func resolveServerAddr(ctx context.Context, conn *pgx.Conn) (string, error) {
rows, err := conn.Query(ctx, "SELECT inet_server_addr();")
if err != nil {
return "", err
}
defer rows.Close()
var addr *net.IPNet
for rows.Next() {
err = rows.Scan(&addr)
if err != nil {
return "", err
}
}
if err := rows.Err(); err != nil {
return "", err
}
return addr.IP.String(), nil
}