Skip to content

Commit 0cb8c0a

Browse files
committed
Fix race condition in transaction handling
Once a server sends a response, it must be ready to receive a new message with the same tag. While using `defer` to clean up a tag once its message was handled was convenient, it ended up creating a race condition on low-latency connections with a highly parallel client (such as `v9fs`) like so: - Client sends Tmessage with tag X - Server sends Rmessage with tag X - Client sends Tmessage with tag X - Server refuses to handle Tmessage because X has not been cleared - Server clears X This change removes that race condition, re-arranging the order of operations to 1. Client sends Tmessage with tag X 2. Server handles message and clears X 3. Server sends Rmessage with tag X We've already cleared the tag for re-use before sending a response. An astute reader would note that with this re-ordering, a client may "steal" a tag before an Rmessage is received by using it between steps 1 and 2. However, the client would be violating the 9P protocol, as it has not received the corresponding R-message (because the server has not even sent it yet!), and only harms itself in this scenario, as it will become unable to recognise the tag on the Rmessage when it does come.
1 parent 6c36951 commit 0cb8c0a

File tree

3 files changed

+27
-22
lines changed

3 files changed

+27
-22
lines changed

conn.go

+9-7
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,8 @@ func (c *conn) handleMessage(m styxproto.Msg) bool {
209209
return c.handleFcall(cx, m)
210210
case styxproto.BadMessage:
211211
c.srv.logf("got bad message from %s: %s", c.remoteAddr(), m.Err)
212-
c.Rerror(m.Tag(), "bad message: %s", m.Err)
213212
c.clearTag(m.Tag())
213+
c.Rerror(m.Tag(), "bad message: %s", m.Err)
214214
return true
215215
default:
216216
c.Rerror(m.Tag(), "unexpected %T message", m)
@@ -261,12 +261,13 @@ Loop:
261261
// - Setting a per-connection session limit
262262
// - close connections that have not established a session in N seconds
263263
func (c *conn) handleTauth(cx context.Context, m styxproto.Tauth) bool {
264-
defer c.clearTag(m.Tag())
265264
if c.srv.Auth == nil {
265+
c.clearTag(m.Tag())
266266
c.Rerror(m.Tag(), "%s", errNotSupported)
267267
return true
268268
}
269269
if _, ok := c.sessionFid.Get(m.Afid()); ok {
270+
c.clearTag(m.Tag())
270271
c.Rerror(m.Tag(), "fid %x in use", m.Afid())
271272
return false
272273
}
@@ -293,7 +294,6 @@ func (c *conn) handleTauth(cx context.Context, m styxproto.Tauth) bool {
293294
}
294295

295296
func (c *conn) handleTattach(cx context.Context, m styxproto.Tattach) bool {
296-
defer c.clearTag(m.Tag())
297297
var handler Handler = DefaultServeMux
298298
if c.srv.Handler != nil {
299299
handler = c.srv.Handler
@@ -307,16 +307,19 @@ func (c *conn) handleTattach(cx context.Context, m styxproto.Tattach) bool {
307307
// We should call the Auth handler if Afid is NOFID, passing it
308308
// a util.BlackHole.
309309
if !c.sessionFid.Fetch(s, m.Afid()) {
310+
c.clearTag(m.Tag())
310311
c.Rerror(m.Tag(), "invalid afid %x", m.Afid())
311312
return false
312313
}
313314
// From attach(5): The same validated afid may be used for
314315
// multiple attach messages with the same uname and aname.
315316
if s.User != string(m.Uname()) || s.Access != string(m.Aname()) {
317+
c.clearTag(m.Tag())
316318
c.Rerror(m.Tag(), "afid mismatch for %s on %s", m.Uname(), m.Aname())
317319
return false
318320
}
319321
if err := <-s.authC; err != nil {
322+
c.clearTag(m.Tag())
320323
c.Rerror(m.Tag(), "auth failed: %s", err)
321324
return false
322325
}
@@ -328,15 +331,14 @@ func (c *conn) handleTattach(cx context.Context, m styxproto.Tattach) bool {
328331
c.sessionFid.Put(m.Fid(), s)
329332
s.IncRef()
330333
s.files.Put(m.Fid(), file{name: "/", rwc: nil})
334+
c.clearTag(m.Tag())
331335
c.Rattach(m.Tag(), c.qid(".", styxproto.QTDIR))
332336
return true
333337
}
334338

335339
func (c *conn) handleTflush(cx context.Context, m styxproto.Tflush) bool {
336-
defer c.clearTag(m.Tag())
337-
338-
oldtag := m.Oldtag()
339-
c.clearTag(oldtag)
340+
c.clearTag(m.Oldtag())
341+
c.clearTag(m.Tag())
340342

341343
c.Rflush(m.Tag())
342344
return true

request.go

+7-7
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (info reqInfo) Path() string {
5757
}
5858

5959
func (info reqInfo) Rerror(format string, args ...interface{}) {
60-
defer info.session.conn.clearTag(info.tag)
60+
info.session.conn.clearTag(info.tag)
6161
info.session.conn.Rerror(info.tag, format, args...)
6262
}
6363

@@ -132,7 +132,6 @@ type Topen struct {
132132
}
133133

134134
func (t Topen) Ropen(rwc interface{}, mode os.FileMode) {
135-
defer t.session.conn.clearTag(t.tag)
136135
var (
137136
file file
138137
f styxfile.Interface
@@ -156,6 +155,7 @@ func (t Topen) Ropen(rwc interface{}, mode os.FileMode) {
156155
file.rwc = f
157156
})
158157
qid := t.session.conn.qid(t.Path(), qidType(mode))
158+
t.session.conn.clearTag(t.tag)
159159
t.session.conn.Ropen(t.tag, qid, 0)
160160
}
161161

@@ -188,7 +188,6 @@ func (t Twalk) Path() string {
188188
// Is that correct in every case?
189189

190190
func (t Twalk) Rwalk(exists bool, mode os.FileMode) {
191-
defer t.session.conn.clearTag(t.tag)
192191
if !exists {
193192
t.defaultResponse()
194193
return
@@ -210,6 +209,7 @@ func (t Twalk) Rwalk(exists bool, mode os.FileMode) {
210209
}
211210
dir, _ = path.Split(dir)
212211
}
212+
t.session.conn.clearTag(t.tag)
213213
if err := t.session.conn.Rwalk(t.tag, wqid...); err != nil {
214214
panic(err)
215215
}
@@ -225,7 +225,6 @@ type Tstat struct {
225225
}
226226

227227
func (t Tstat) Rstat(info os.FileInfo) {
228-
defer t.session.conn.clearTag(t.tag)
229228
buf := make([]byte, styxproto.MaxStatLen)
230229
uid, gid, muid := sys.FileOwner(info)
231230
name := info.Name()
@@ -242,6 +241,7 @@ func (t Tstat) Rstat(info os.FileInfo) {
242241
stat.SetAtime(uint32(info.ModTime().Unix())) // TODO: get atime
243242
stat.SetMtime(uint32(info.ModTime().Unix()))
244243
stat.SetQid(t.session.conn.qid(t.Path(), qidType(info.Mode())))
244+
t.session.conn.clearTag(t.tag)
245245
t.session.conn.Rstat(t.tag, stat)
246246
}
247247

@@ -261,7 +261,6 @@ type Tcreate struct {
261261
}
262262

263263
func (t Tcreate) Rcreate(rwc interface{}) {
264-
defer t.session.conn.clearTag(t.tag)
265264
var (
266265
f styxfile.Interface
267266
err error
@@ -284,6 +283,7 @@ func (t Tcreate) Rcreate(rwc interface{}) {
284283

285284
qtype := qidType(t.Perm)
286285
qid := t.session.conn.qid(file.name, qtype)
286+
t.session.conn.clearTag(t.tag)
287287
t.session.conn.Rcreate(t.tag, qid, 0)
288288
}
289289

@@ -298,10 +298,10 @@ type Tremove struct {
298298
}
299299

300300
func (t Tremove) Rremove() {
301-
defer t.session.conn.clearTag(t.tag)
302301
t.session.conn.sessionFid.Del(t.fid)
303302
t.session.files.Del(t.fid)
304303
t.session.conn.qidpool.Del(t.Path())
304+
t.session.conn.clearTag(t.tag)
305305
t.session.conn.Rremove(t.tag)
306306
if !t.session.DecRef() {
307307
t.session.close()
@@ -320,7 +320,7 @@ type Twstat struct {
320320
}
321321

322322
func (t Twstat) Rwstat() {
323-
defer t.session.conn.clearTag(t.tag)
323+
t.session.conn.clearTag(t.tag)
324324
t.session.conn.Rwstat(t.tag)
325325
}
326326

session.go

+11-8
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ func (s *Session) handleTwalk(cx context.Context, msg styxproto.Twalk, file file
110110
// newfid must be unused or equal to fid
111111
if newfid != msg.Fid() {
112112
if _, ok := s.conn.sessionFid.Get(newfid); ok {
113-
s.conn.Rerror(msg.Tag(), "Twalk: fid %x already in use", newfid)
114113
s.conn.clearTag(msg.Tag())
114+
s.conn.Rerror(msg.Tag(), "Twalk: fid %x already in use", newfid)
115115
return false
116116
}
117117
}
@@ -126,8 +126,8 @@ func (s *Session) handleTwalk(cx context.Context, msg styxproto.Twalk, file file
126126
s.conn.sessionFid.Put(newfid, s)
127127
s.IncRef()
128128
}
129-
s.conn.Rwalk(msg.Tag())
130129
s.conn.clearTag(msg.Tag())
130+
s.conn.Rwalk(msg.Tag())
131131
return true
132132
}
133133

@@ -180,7 +180,6 @@ func (s *Session) handleTremove(cx context.Context, msg styxproto.Tremove, file
180180

181181
func (s *Session) handleTstat(cx context.Context, msg styxproto.Tstat, file file) bool {
182182
if file.auth {
183-
defer s.conn.clearTag(msg.Tag())
184183
buf := make([]byte, styxproto.MaxStatLen)
185184
stat, _, err := styxproto.NewStat(buf, "", "", "", "")
186185
if err != nil {
@@ -190,6 +189,7 @@ func (s *Session) handleTstat(cx context.Context, msg styxproto.Tstat, file file
190189
}
191190
stat.SetMode(styxproto.DMAUTH)
192191
stat.SetQid(s.conn.qid("", styxproto.QTAUTH))
192+
s.conn.clearTag(msg.Tag())
193193
s.conn.Rstat(msg.Tag(), stat)
194194
return true
195195
}
@@ -210,8 +210,8 @@ func (s *Session) handleTwstat(cx context.Context, msg styxproto.Twstat, file fi
210210
}
211211

212212
func (s *Session) handleTread(cx context.Context, msg styxproto.Tread, file file) bool {
213-
defer s.conn.clearTag(msg.Tag())
214213
if file.rwc == nil {
214+
s.conn.clearTag(msg.Tag())
215215
s.conn.Rerror(msg.Tag(), "file %s is not open for reading", file.name)
216216
return false
217217
}
@@ -227,6 +227,7 @@ func (s *Session) handleTread(cx context.Context, msg styxproto.Tread, file file
227227
// TODO(droyo) cancellation
228228
n, err := file.rwc.ReadAt(buf, msg.Offset())
229229

230+
s.conn.clearTag(msg.Tag())
230231
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
231232
s.conn.Rerror(msg.Tag(), "%v", err)
232233
} else {
@@ -236,31 +237,33 @@ func (s *Session) handleTread(cx context.Context, msg styxproto.Tread, file file
236237
}
237238

238239
func (s *Session) handleTwrite(cx context.Context, msg styxproto.Twrite, file file) bool {
239-
defer s.conn.clearTag(msg.Tag())
240240
if file.rwc == nil {
241+
s.conn.clearTag(msg.Tag())
241242
s.conn.Rerror(msg.Tag(), "file %q is not opened for writing", file.name)
242243
return false
243244
}
244245

245246
// TODO(droyo): handle cancellation
246247
w := util.NewSectionWriter(file.rwc, msg.Offset(), msg.Count())
247248
n, err := io.Copy(w, msg)
248-
if err != nil {
249+
s.conn.clearTag(msg.Tag())
250+
if n == 0 && err != nil {
249251
s.conn.Rerror(msg.Tag(), "%v", err)
252+
} else {
253+
s.conn.Rwrite(msg.Tag(), n)
250254
}
251-
s.conn.Rwrite(msg.Tag(), n)
252255
return true
253256
}
254257

255258
func (s *Session) handleTclunk(cx context.Context, msg styxproto.Tclunk, file file) bool {
256-
defer s.conn.clearTag(msg.Tag())
257259
s.conn.sessionFid.Del(msg.Fid())
258260
if file.rwc != nil {
259261
if err := file.rwc.Close(); err != nil {
260262
s.conn.Rerror(msg.Tag(), "close %s: %v", file.name, err)
261263
}
262264
}
263265
s.files.Del(msg.Fid())
266+
s.conn.clearTag(msg.Tag())
264267
s.conn.Rclunk(msg.Tag())
265268
if !s.DecRef() {
266269
s.endSession()

0 commit comments

Comments
 (0)