11/*
2- * TCP CUBIC: Binary Increase Congestion control for TCP v2.2
2+ * TCP CUBIC: Binary Increase Congestion control for TCP v2.3
33 * Home page:
44 * http://netsrv.csc.ncsu.edu/twiki/bin/view/Main/BIC
55 * This is from the implementation of CUBIC TCP in
6- * Injong Rhee, Lisong Xu.
7- * "CUBIC: A New TCP-Friendly High-Speed TCP Variant
8- * in PFLDnet 2005
6+ * Sangtae Ha, Injong Rhee and Lisong Xu,
7+ * "CUBIC: A New TCP-Friendly High-Speed TCP Variant"
8+ * in ACM SIGOPS Operating System Review, July 2008.
99 * Available from:
10- * http://netsrv.csc.ncsu.edu/export/cubic-paper.pdf
10+ * http://netsrv.csc.ncsu.edu/export/cubic_a_new_tcp_2008.pdf
11+ *
12+ * CUBIC integrates a new slow start algorithm, called HyStart.
13+ * The details of HyStart are presented in
14+ * Sangtae Ha and Injong Rhee,
15+ * "Taming the Elephants: New TCP Slow Start", NCSU TechReport 2008.
16+ * Available from:
17+ * http://netsrv.csc.ncsu.edu/export/hystart_techreport_2008.pdf
18+ *
19+ * All testing results are available from:
20+ * http://netsrv.csc.ncsu.edu/wiki/index.php/TCP_Testing
1121 *
1222 * Unless CUBIC is enabled and congestion window is large
1323 * this behaves the same as the original Reno.
2333 */
2434#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
2535
36+ /* Two methods of hybrid slow start */
37+ #define HYSTART_ACK_TRAIN 0x1
38+ #define HYSTART_DELAY 0x2
39+
40+ /* Number of delay samples for detecting the increase of delay */
41+ #define HYSTART_MIN_SAMPLES 8
42+ #define HYSTART_DELAY_MIN (2U<<3)
43+ #define HYSTART_DELAY_MAX (16U<<3)
44+ #define HYSTART_DELAY_THRESH (x ) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45+
2646static int fast_convergence __read_mostly = 1 ;
2747static int beta __read_mostly = 717 ; /* = 717/1024 (BICTCP_BETA_SCALE) */
2848static int initial_ssthresh __read_mostly ;
2949static int bic_scale __read_mostly = 41 ;
3050static int tcp_friendliness __read_mostly = 1 ;
3151
52+ static int hystart __read_mostly = 1 ;
53+ static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY ;
54+ static int hystart_low_window __read_mostly = 16 ;
55+
3256static u32 cube_rtt_scale __read_mostly ;
3357static u32 beta_scale __read_mostly ;
3458static u64 cube_factor __read_mostly ;
@@ -44,6 +68,13 @@ module_param(bic_scale, int, 0444);
4468MODULE_PARM_DESC (bic_scale , "scale (scaled by 1024) value for bic function (bic_scale/1024)" );
4569module_param (tcp_friendliness , int , 0644 );
4670MODULE_PARM_DESC (tcp_friendliness , "turn on/off tcp friendliness" );
71+ module_param (hystart , int , 0644 );
72+ MODULE_PARM_DESC (hystart , "turn on/off hybrid slow start algorithm" );
73+ module_param (hystart_detect , int , 0644 );
74+ MODULE_PARM_DESC (hystart_detect , "hyrbrid slow start detection mechanisms"
75+ " 1: packet-train 2: delay 3: both packet-train and delay" );
76+ module_param (hystart_low_window , int , 0644 );
77+ MODULE_PARM_DESC (hystart_low_window , "lower bound cwnd for hybrid slow start" );
4778
4879/* BIC TCP Parameters */
4980struct bictcp {
@@ -59,7 +90,13 @@ struct bictcp {
5990 u32 ack_cnt ; /* number of acks */
6091 u32 tcp_cwnd ; /* estimated tcp cwnd */
6192#define ACK_RATIO_SHIFT 4
62- u32 delayed_ack ; /* estimate the ratio of Packets/ACKs << 4 */
93+ u16 delayed_ack ; /* estimate the ratio of Packets/ACKs << 4 */
94+ u8 sample_cnt ; /* number of samples to decide curr_rtt */
95+ u8 found ; /* the exit point is found? */
96+ u32 round_start ; /* beginning of each round */
97+ u32 end_seq ; /* end_seq of the round */
98+ u32 last_jiffies ; /* last time when the ACK spacing is close */
99+ u32 curr_rtt ; /* the minimum rtt of current round */
63100};
64101
65102static inline void bictcp_reset (struct bictcp * ca )
@@ -76,12 +113,28 @@ static inline void bictcp_reset(struct bictcp *ca)
76113 ca -> delayed_ack = 2 << ACK_RATIO_SHIFT ;
77114 ca -> ack_cnt = 0 ;
78115 ca -> tcp_cwnd = 0 ;
116+ ca -> found = 0 ;
117+ }
118+
119+ static inline void bictcp_hystart_reset (struct sock * sk )
120+ {
121+ struct tcp_sock * tp = tcp_sk (sk );
122+ struct bictcp * ca = inet_csk_ca (sk );
123+
124+ ca -> round_start = ca -> last_jiffies = jiffies ;
125+ ca -> end_seq = tp -> snd_nxt ;
126+ ca -> curr_rtt = 0 ;
127+ ca -> sample_cnt = 0 ;
79128}
80129
81130static void bictcp_init (struct sock * sk )
82131{
83132 bictcp_reset (inet_csk_ca (sk ));
84- if (initial_ssthresh )
133+
134+ if (hystart )
135+ bictcp_hystart_reset (sk );
136+
137+ if (!hystart && initial_ssthresh )
85138 tcp_sk (sk )-> snd_ssthresh = initial_ssthresh ;
86139}
87140
@@ -235,9 +288,11 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
235288 if (!tcp_is_cwnd_limited (sk , in_flight ))
236289 return ;
237290
238- if (tp -> snd_cwnd <= tp -> snd_ssthresh )
291+ if (tp -> snd_cwnd <= tp -> snd_ssthresh ) {
292+ if (hystart && after (ack , ca -> end_seq ))
293+ bictcp_hystart_reset (sk );
239294 tcp_slow_start (tp );
240- else {
295+ } else {
241296 bictcp_update (ca , tp -> snd_cwnd );
242297
243298 /* In dangerous area, increase slowly.
@@ -281,8 +336,45 @@ static u32 bictcp_undo_cwnd(struct sock *sk)
281336
282337static void bictcp_state (struct sock * sk , u8 new_state )
283338{
284- if (new_state == TCP_CA_Loss )
339+ if (new_state == TCP_CA_Loss ) {
285340 bictcp_reset (inet_csk_ca (sk ));
341+ bictcp_hystart_reset (sk );
342+ }
343+ }
344+
345+ static void hystart_update (struct sock * sk , u32 delay )
346+ {
347+ struct tcp_sock * tp = tcp_sk (sk );
348+ struct bictcp * ca = inet_csk_ca (sk );
349+
350+ if (!(ca -> found & hystart_detect )) {
351+ u32 curr_jiffies = jiffies ;
352+
353+ /* first detection parameter - ack-train detection */
354+ if (curr_jiffies - ca -> last_jiffies <= msecs_to_jiffies (2 )) {
355+ ca -> last_jiffies = curr_jiffies ;
356+ if (curr_jiffies - ca -> round_start >= ca -> delay_min >>4 )
357+ ca -> found |= HYSTART_ACK_TRAIN ;
358+ }
359+
360+ /* obtain the minimum delay of more than sampling packets */
361+ if (ca -> sample_cnt < HYSTART_MIN_SAMPLES ) {
362+ if (ca -> curr_rtt == 0 || ca -> curr_rtt > delay )
363+ ca -> curr_rtt = delay ;
364+
365+ ca -> sample_cnt ++ ;
366+ } else {
367+ if (ca -> curr_rtt > ca -> delay_min +
368+ HYSTART_DELAY_THRESH (ca -> delay_min >>4 ))
369+ ca -> found |= HYSTART_DELAY ;
370+ }
371+ /*
372+ * Either one of two conditions are met,
373+ * we exit from slow start immediately.
374+ */
375+ if (ca -> found & hystart_detect )
376+ tp -> snd_ssthresh = tp -> snd_cwnd ;
377+ }
286378}
287379
288380/* Track delayed acknowledgment ratio using sliding window
@@ -291,6 +383,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
291383static void bictcp_acked (struct sock * sk , u32 cnt , s32 rtt_us )
292384{
293385 const struct inet_connection_sock * icsk = inet_csk (sk );
386+ const struct tcp_sock * tp = tcp_sk (sk );
294387 struct bictcp * ca = inet_csk_ca (sk );
295388 u32 delay ;
296389
@@ -314,6 +407,11 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
314407 /* first time call or link delay decreases */
315408 if (ca -> delay_min == 0 || ca -> delay_min > delay )
316409 ca -> delay_min = delay ;
410+
411+ /* hystart triggers when cwnd is larger than some threshold */
412+ if (hystart && tp -> snd_cwnd <= tp -> snd_ssthresh &&
413+ tp -> snd_cwnd >= hystart_low_window )
414+ hystart_update (sk , delay );
317415}
318416
319417static struct tcp_congestion_ops cubictcp = {
@@ -372,4 +470,4 @@ module_exit(cubictcp_unregister);
372470MODULE_AUTHOR ("Sangtae Ha, Stephen Hemminger" );
373471MODULE_LICENSE ("GPL" );
374472MODULE_DESCRIPTION ("CUBIC TCP" );
375- MODULE_VERSION ("2.2 " );
473+ MODULE_VERSION ("2.3 " );
0 commit comments