Skip to content

Commit bec41a1

Browse files
yuchungchengdavem330
authored andcommitted
tcp: remove early retransmit
This patch removes the support of RFC5827 early retransmit (i.e., fast recovery on small inflight with <3 dupacks) because it is subsumed by the new RACK loss detection. More specifically when RACK receives DUPACKs, it'll arm a reordering timer to start fast recovery after a quarter of (min)RTT, hence it covers the early retransmit except RACK does not limit itself to specific inflight or dupack numbers. Signed-off-by: Yuchung Cheng <ycheng@google.com> Signed-off-by: Neal Cardwell <ncardwell@google.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 840a3cb commit bec41a1

File tree

12 files changed

+12
-111
lines changed

12 files changed

+12
-111
lines changed

Documentation/networking/ip-sysctl.txt

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -246,21 +246,12 @@ tcp_dsack - BOOLEAN
246246
Allows TCP to send "duplicate" SACKs.
247247

248248
tcp_early_retrans - INTEGER
249-
Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
250-
for triggering fast retransmit when the amount of outstanding data is
251-
small and when no previously unsent data can be transmitted (such
252-
that limited transmit could be used). Also controls the use of
253-
Tail loss probe (TLP) that converts RTOs occurring due to tail
254-
losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
249+
Tail loss probe (TLP) converts RTOs occurring due to tail
250+
losses into fast recovery (draft-ietf-tcpm-rack). Note that
251+
TLP requires RACK to function properly (see tcp_recovery below)
255252
Possible values:
256-
0 disables ER
257-
1 enables ER
258-
2 enables ER but delays fast recovery and fast retransmit
259-
by a fourth of RTT. This mitigates connection falsely
260-
recovers when network has a small degree of reordering
261-
(less than 3 packets).
262-
3 enables delayed ER and TLP.
263-
4 enables TLP only.
253+
0 disables TLP
254+
3 or 4 enables TLP
264255
Default: 3
265256

266257
tcp_ecn - INTEGER

include/linux/tcp.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,7 @@ struct tcp_sock {
224224
repair : 1,
225225
frto : 1;/* F-RTO (RFC5682) activated in CA_Loss */
226226
u8 repair_queue;
227-
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
228-
syn_data:1, /* SYN includes data */
227+
u8 syn_data:1, /* SYN includes data */
229228
syn_fastopen:1, /* SYN includes Fast Open option */
230229
syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
231230
syn_data_acked:1,/* data in SYN is acked by SYN-ACK */

include/net/tcp.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,6 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
565565
const struct sk_buff *next_skb);
566566

567567
/* tcp_input.c */
568-
void tcp_resume_early_retransmit(struct sock *sk);
569568
void tcp_rearm_rto(struct sock *sk);
570569
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
571570
void tcp_reset(struct sock *sk);
@@ -1037,24 +1036,6 @@ static inline void tcp_enable_fack(struct tcp_sock *tp)
10371036
tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
10381037
}
10391038

1040-
/* TCP early-retransmit (ER) is similar to but more conservative than
1041-
* the thin-dupack feature. Enable ER only if thin-dupack is disabled.
1042-
*/
1043-
static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
1044-
{
1045-
struct net *net = sock_net((struct sock *)tp);
1046-
1047-
tp->do_early_retrans = sysctl_tcp_early_retrans &&
1048-
sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
1049-
!(sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) &&
1050-
net->ipv4.sysctl_tcp_reordering == 3;
1051-
}
1052-
1053-
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
1054-
{
1055-
tp->do_early_retrans = 0;
1056-
}
1057-
10581039
static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
10591040
{
10601041
return tp->sacked_out + tp->lost_out;

net/ipv4/inet_diag.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
215215
}
216216

217217
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
218-
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
219218
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
220219
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
221220
r->idiag_timer = 1;

net/ipv4/tcp.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk)
406406
tp->mss_cache = TCP_MSS_DEFAULT;
407407

408408
tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
409-
tcp_enable_early_retrans(tp);
410409
tcp_assign_congestion_control(sk);
411410

412411
tp->tsoffset = 0;
@@ -2477,8 +2476,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
24772476
err = -EINVAL;
24782477
else {
24792478
tp->thin_dupack = val;
2480-
if (tp->thin_dupack)
2481-
tcp_disable_early_retrans(tp);
24822479
}
24832480
break;
24842481

net/ipv4/tcp_input.c

Lines changed: 2 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -904,8 +904,6 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
904904
tcp_disable_fack(tp);
905905
}
906906

907-
if (metric > 0)
908-
tcp_disable_early_retrans(tp);
909907
tp->rack.reord = 1;
910908
}
911909

@@ -2054,30 +2052,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
20542052
return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
20552053
}
20562054

2057-
static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2058-
{
2059-
struct tcp_sock *tp = tcp_sk(sk);
2060-
unsigned long delay;
2061-
2062-
/* Delay early retransmit and entering fast recovery for
2063-
* max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2064-
* available, or RTO is scheduled to fire first.
2065-
*/
2066-
if (sysctl_tcp_early_retrans < 2 || sysctl_tcp_early_retrans > 3 ||
2067-
(flag & FLAG_ECE) || !tp->srtt_us)
2068-
return false;
2069-
2070-
delay = max(usecs_to_jiffies(tp->srtt_us >> 5),
2071-
msecs_to_jiffies(2));
2072-
2073-
if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2074-
return false;
2075-
2076-
inet_csk_reset_xmit_timer(sk, ICSK_TIME_EARLY_RETRANS, delay,
2077-
TCP_RTO_MAX);
2078-
return true;
2079-
}
2080-
20812055
/* Linux NewReno/SACK/FACK/ECN state machine.
20822056
* --------------------------------------
20832057
*
@@ -2221,16 +2195,6 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
22212195
tcp_is_sack(tp) && !tcp_send_head(sk))
22222196
return true;
22232197

2224-
/* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
2225-
* retransmissions due to small network reorderings, we implement
2226-
* Mitigation A.3 in the RFC and delay the retransmission for a short
2227-
* interval if appropriate.
2228-
*/
2229-
if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2230-
(tp->packets_out >= (tp->sacked_out + 1) && tp->packets_out < 4) &&
2231-
!tcp_may_send_now(sk))
2232-
return !tcp_pause_early_retransmit(sk, flag);
2233-
22342198
return false;
22352199
}
22362200

@@ -3050,8 +3014,7 @@ void tcp_rearm_rto(struct sock *sk)
30503014
} else {
30513015
u32 rto = inet_csk(sk)->icsk_rto;
30523016
/* Offset the time elapsed after installing regular RTO */
3053-
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3054-
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
3017+
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
30553018
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
30563019
struct sk_buff *skb = tcp_write_queue_head(sk);
30573020
const u32 rto_time_stamp =
@@ -3068,24 +3031,6 @@ void tcp_rearm_rto(struct sock *sk)
30683031
}
30693032
}
30703033

3071-
/* This function is called when the delayed ER timer fires. TCP enters
3072-
* fast recovery and performs fast-retransmit.
3073-
*/
3074-
void tcp_resume_early_retransmit(struct sock *sk)
3075-
{
3076-
struct tcp_sock *tp = tcp_sk(sk);
3077-
3078-
tcp_rearm_rto(sk);
3079-
3080-
/* Stop if ER is disabled after the delayed ER timer is scheduled */
3081-
if (!tp->do_early_retrans)
3082-
return;
3083-
3084-
tcp_enter_recovery(sk, false);
3085-
tcp_update_scoreboard(sk, 1);
3086-
tcp_xmit_retransmit_queue(sk);
3087-
}
3088-
30893034
/* If we get here, the whole TSO packet has not been acked. */
30903035
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
30913036
{
@@ -3651,8 +3596,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
36513596

36523597
skb_mstamp_get(&sack_state.ack_time);
36533598

3654-
if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
3655-
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3599+
if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
36563600
tcp_rearm_rto(sk);
36573601

36583602
if (after(ack, prior_snd_una)) {

net/ipv4/tcp_ipv4.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2229,7 +2229,6 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
22292229
int state;
22302230

22312231
if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2232-
icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
22332232
icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
22342233
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
22352234
timer_active = 1;

net/ipv4/tcp_metrics.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,6 @@ void tcp_init_metrics(struct sock *sk)
522522
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
523523
if (val && tp->reordering != val) {
524524
tcp_disable_fack(tp);
525-
tcp_disable_early_retrans(tp);
526525
tp->reordering = val;
527526
}
528527

net/ipv4/tcp_minisocks.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
468468
newtp->sacked_out = 0;
469469
newtp->fackets_out = 0;
470470
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
471-
tcp_enable_early_retrans(newtp);
472471
newtp->tlp_high_seq = 0;
473472
newtp->lsndtime = treq->snt_synack.stamp_jiffies;
474473
newsk->sk_txhash = treq->txhash;

net/ipv4/tcp_output.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
7676
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
7777

7878
tp->packets_out += tcp_skb_pcount(skb);
79-
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
80-
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
79+
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
8180
tcp_rearm_rto(sk);
82-
}
8381

8482
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
8583
tcp_skb_pcount(skb));
@@ -2289,8 +2287,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
22892287
u32 timeout, tlp_time_stamp, rto_time_stamp;
22902288
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
22912289

2292-
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS))
2293-
return false;
22942290
/* No consecutive loss probes. */
22952291
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
22962292
tcp_rearm_rto(sk);
@@ -2309,8 +2305,9 @@ bool tcp_schedule_loss_probe(struct sock *sk)
23092305
/* Schedule a loss probe in 2*RTT for SACK capable connections
23102306
* in Open state, that are either limited by cwnd or application.
23112307
*/
2312-
if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
2313-
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
2308+
if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) ||
2309+
!tp->packets_out || !tcp_is_sack(tp) ||
2310+
icsk->icsk_ca_state != TCP_CA_Open)
23142311
return false;
23152312

23162313
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&

0 commit comments

Comments
 (0)