Skip to content

Commit f1ecd5d

Browse files
Damian Lukowskidavem330
authored andcommitted
Revert Backoff [v3]: Revert RTO on ICMP destination unreachable
Here, an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, is taken as an indication that the RTO retransmission has not been lost due to congestion, but because of a route failure somewhere along the path. With true congestion, a router won't trigger such a message and the patched TCP will operate as standard TCP. This patch reverts one RTO backoff, if an ICMP host/network unreachable message, whose payload fits to TCP's SND.UNA, arrives. Based on the new RTO, the retransmission timer is reset to reflect the remaining time, or - if the revert clocked out the timer - a retransmission is sent out immediately. Backoffs are only reverted, if TCP is in RTO loss recovery, i.e. if there have been retransmissions and reversible backoffs, already. Changes from v2: 1) Renaming of skb in tcp_v4_err() moved to another patch. 2) Reintroduced tcp_bound_rto() and __tcp_set_rto(). 3) Fixed code comments. Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de> Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 4d1a2d9 commit f1ecd5d

File tree

4 files changed

+52
-4
lines changed

4 files changed

+52
-4
lines changed

include/net/tcp.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ extern void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
469469
int nonagle);
470470
extern int tcp_may_send_now(struct sock *sk);
471471
extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
472+
extern void tcp_retransmit_timer(struct sock *sk);
472473
extern void tcp_xmit_retransmit_queue(struct sock *);
473474
extern void tcp_simple_retransmit(struct sock *);
474475
extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
@@ -521,6 +522,17 @@ extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
521522
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
522523
extern void tcp_mtup_init(struct sock *sk);
523524

525+
static inline void tcp_bound_rto(const struct sock *sk)
526+
{
527+
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
528+
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
529+
}
530+
531+
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
532+
{
533+
return (tp->srtt >> 3) + tp->rttvar;
534+
}
535+
524536
static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
525537
{
526538
tp->pred_flags = htonl((tp->tcp_header_len << 26) |

net/ipv4/tcp_input.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -685,7 +685,7 @@ static inline void tcp_set_rto(struct sock *sk)
685685
* is invisible. Actually, Linux-2.4 also generates erratic
686686
* ACKs in some circumstances.
687687
*/
688-
inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
688+
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp);
689689

690690
/* 2. Fixups made earlier cannot be right.
691691
* If we do not estimate RTO correctly without them,
@@ -696,8 +696,7 @@ static inline void tcp_set_rto(struct sock *sk)
696696
/* NOTE: clamping at TCP_RTO_MIN is not required, current algo
697697
* guarantees that rto is higher.
698698
*/
699-
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
700-
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
699+
tcp_bound_rto(sk);
701700
}
702701

703702
/* Save metrics learned by this TCP session.

net/ipv4/tcp_ipv4.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,12 +332,15 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
332332
{
333333
struct iphdr *iph = (struct iphdr *)icmp_skb->data;
334334
struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
335+
struct inet_connection_sock *icsk;
335336
struct tcp_sock *tp;
336337
struct inet_sock *inet;
337338
const int type = icmp_hdr(icmp_skb)->type;
338339
const int code = icmp_hdr(icmp_skb)->code;
339340
struct sock *sk;
341+
struct sk_buff *skb;
340342
__u32 seq;
343+
__u32 remaining;
341344
int err;
342345
struct net *net = dev_net(icmp_skb->dev);
343346

@@ -367,6 +370,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
367370
if (sk->sk_state == TCP_CLOSE)
368371
goto out;
369372

373+
icsk = inet_csk(sk);
370374
tp = tcp_sk(sk);
371375
seq = ntohl(th->seq);
372376
if (sk->sk_state != TCP_LISTEN &&
@@ -393,6 +397,39 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
393397
}
394398

395399
err = icmp_err_convert[code].errno;
400+
/* check if icmp_skb allows revert of backoff
401+
* (see draft-zimmermann-tcp-lcd) */
402+
if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
403+
break;
404+
if (seq != tp->snd_una || !icsk->icsk_retransmits ||
405+
!icsk->icsk_backoff)
406+
break;
407+
408+
icsk->icsk_backoff--;
409+
inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
410+
icsk->icsk_backoff;
411+
tcp_bound_rto(sk);
412+
413+
skb = tcp_write_queue_head(sk);
414+
BUG_ON(!skb);
415+
416+
remaining = icsk->icsk_rto - min(icsk->icsk_rto,
417+
tcp_time_stamp - TCP_SKB_CB(skb)->when);
418+
419+
if (remaining) {
420+
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
421+
remaining, TCP_RTO_MAX);
422+
} else if (sock_owned_by_user(sk)) {
423+
/* RTO revert clocked out retransmission,
424+
* but socket is locked. Will defer. */
425+
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
426+
HZ/20, TCP_RTO_MAX);
427+
} else {
428+
/* RTO revert clocked out retransmission.
429+
* Will retransmit now */
430+
tcp_retransmit_timer(sk);
431+
}
432+
396433
break;
397434
case ICMP_TIME_EXCEEDED:
398435
err = EHOSTUNREACH;

net/ipv4/tcp_timer.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ static void tcp_probe_timer(struct sock *sk)
279279
* The TCP retransmit timer.
280280
*/
281281

282-
static void tcp_retransmit_timer(struct sock *sk)
282+
void tcp_retransmit_timer(struct sock *sk)
283283
{
284284
struct tcp_sock *tp = tcp_sk(sk);
285285
struct inet_connection_sock *icsk = inet_csk(sk);

0 commit comments

Comments
 (0)