Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions pkg/tcpip/stack/tcp.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,26 @@ type TCPCubicState struct {
// WEst is the window computed by CUBIC at time
// TimeSinceLastCongestion+RTT i.e WC(TimeSinceLastCongestion+RTT).
WEst float64

// EndSeq is the sequence number that, when cumulatively ACK'd, ends the
// HyStart round
EndSeq seqnum.Value

// CurrRTT is the minimum round-trip time from the current round
CurrRTT time.Duration

// LastRTT is the minimum round-trip time from the previous round
LastRTT time.Duration

// SampleCount is the number of samples from the current round
SampleCount uint

// LastAck is the time we received the most recent ACK (or start of round if
// more recent).
LastAck tcpip.MonotonicTime

// RoundStart is the time we started the most recent HyStart round
RoundStart tcpip.MonotonicTime
}

// TCPRACKState is used to hold a copy of the internal RACK state when the
Expand Down
1 change: 1 addition & 0 deletions pkg/tcpip/transport/tcp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ go_test(
name = "tcp_test",
size = "small",
srcs = [
"cubic_test.go",
"main_test.go",
"segment_test.go",
"timer_test.go",
Expand Down
102 changes: 100 additions & 2 deletions pkg/tcpip/transport/tcp/cubic.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,40 @@ import (
"math"
"time"

"gvisor.dev/gvisor/pkg/tcpip"
"gvisor.dev/gvisor/pkg/tcpip/stack"
)

// effectivelyInfinity is an initialization value used for round-trip times
// that are then set using min. It is equal to approximately 100 years: large
// enough that it will always be greater than a real TCP round-trip time, and
// small enough that it fits in time.Duration.
const effectivelyInfinity = 100 * 365 * 24 * time.Hour

const (
// RTT = round-trip time.

// The delay increase sensitivity is determined by minRTTThresh and
// maxRTTThresh. Smaller values of minRTTThresh may cause spurious exits
// from slow start. Larger values of maxRTTThresh may result in slow start
// not exiting until loss is encountered for connections on large RTT paths.
minRTTThresh = 4 * time.Millisecond
maxRTTThresh = 16 * time.Millisecond

// minRTTDivisor is a fraction of RTT to compute the delay threshold. A
// smaller value would mean a larger threshold and thus less sensitivity to
// delay increase, and vice versa.
minRTTDivisor = 8

// nRTTSample is the minimum number of RTT samples in the round before
// considering whether to exit the round due to increased RTT.
nRTTSample = 8

// ackDelta is the maximum time between ACKs for them to be considered part
// of the same ACK Train during HyStart
ackDelta = 2 * time.Millisecond
)

// cubicState stores the variables related to TCP CUBIC congestion
// control algorithm state.
//
Expand All @@ -39,11 +70,19 @@ type cubicState struct {
// newCubicCC returns a partially initialized cubic state with the constants
// beta and c set and t set to current time.
func newCubicCC(s *sender) *cubicState {
now := s.ep.stack.Clock().NowMonotonic()
return &cubicState{
TCPCubicState: stack.TCPCubicState{
T: s.ep.stack.Clock().NowMonotonic(),
T: now,
Beta: 0.7,
C: 0.4,
// by this point, the sender has initialized it's initial sequence
// number.
EndSeq: s.SndNxt,
LastRTT: effectivelyInfinity,
CurrRTT: effectivelyInfinity,
LastAck: now,
RoundStart: now,
},
s: s,
}
Expand All @@ -66,6 +105,62 @@ func (c *cubicState) enterCongestionAvoidance() {
}
}

// updateHyStart tracks packet round-trip time (rtt) to find a safe threshold
// to exit slow start without triggering packet loss. It updates the SSThresh
// when it does.
//
// Implementation of HyStart follows the algorithm from the Linux kernel, rather
// than RFC 9406 (https://www.rfc-editor.org/rfc/rfc9406.html). Briefly, the
// Linux kernel algorithm is based directly on the original HyStart paper
// (https://doi.org/10.1016/j.comnet.2011.01.014), and differs from the RFC in
// that two detection algorithms run in parallel ('ACK train' and 'Delay
// increase'). The RFC version includes only the latter algorithm and adds an
// intermediate phase called Conservative Slow Start, which is not implemented
// here.
func (c *cubicState) updateHyStart(rtt time.Duration) {
if rtt < 0 {
// negative indicates unknown
return
}
now := c.s.ep.stack.Clock().NowMonotonic()
if c.EndSeq.LessThan(c.s.SndUna) {
c.beginHyStartRound(now)
}
// ACK train
if now.Sub(c.LastAck) < ackDelta && // ensures acks are part of the same "train"
c.LastRTT < effectivelyInfinity {
c.LastAck = now
if thresh := c.LastRTT / 2; now.Sub(c.RoundStart) > thresh {
c.s.Ssthresh = c.s.SndCwnd
}
}

// Delay increase
c.CurrRTT = min(c.CurrRTT, rtt)
c.SampleCount++

if c.SampleCount >= nRTTSample && c.LastRTT < effectivelyInfinity {
// i.e. LastRTT/minRTTDivisor, but clamped to minRTTThresh & maxRTTThresh
thresh := max(
minRTTThresh,
min(maxRTTThresh, c.LastRTT/minRTTDivisor),
)
if c.CurrRTT >= (c.LastRTT + thresh) {
// Triggered HyStart safe exit threshold
c.s.Ssthresh = c.s.SndCwnd
}
}
}

func (c *cubicState) beginHyStartRound(now tcpip.MonotonicTime) {
c.EndSeq = c.s.SndNxt
c.SampleCount = 0
c.LastRTT = c.CurrRTT
c.CurrRTT = effectivelyInfinity
c.LastAck = now
c.RoundStart = now
}

// updateSlowStart will update the congestion window as per the slow-start
// algorithm used by NewReno. If after adjusting the congestion window we cross
// the ssThresh then it will return the number of packets that must be consumed
Expand All @@ -92,7 +187,10 @@ func (c *cubicState) updateSlowStart(packetsAcked int) int {
// Update updates cubic's internal state variables. It must be called on every
// ACK received.
// Refer: https://tools.ietf.org/html/rfc8312#section-4
func (c *cubicState) Update(packetsAcked int) {
func (c *cubicState) Update(packetsAcked int, rtt time.Duration) {
if c.s.Ssthresh == InitialSsthresh && c.s.SndCwnd < c.s.Ssthresh {
c.updateHyStart(rtt)
}
if c.s.SndCwnd < c.s.Ssthresh {
packetsAcked = c.updateSlowStart(packetsAcked)
if packetsAcked == 0 {
Expand Down
Loading