Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
287 lines (259 sloc) 10.4 KB
From e9aec3f857c215bc7393d6bd598e24d7f6a757e1 Mon Sep 17 00:00:00 2001
From: Ali Abbas <ali@alouche.net>
Date: Mon, 20 Feb 2012 11:55:40 +0100
Subject: [PATCH] rfc2988bis
---
include/linux/tcp.h | 1 +
include/net/tcp.h | 11 +++++++-
net/ipv4/syncookies.c | 1 +
net/ipv4/tcp_input.c | 57 ++++++++++++++++++++++-----------------------
net/ipv4/tcp_ipv4.c | 12 ++++++---
net/ipv4/tcp_minisocks.c | 7 +++++-
net/ipv6/syncookies.c | 1 +
net/ipv6/tcp_ipv6.c | 6 ++++-
8 files changed, 59 insertions(+), 37 deletions(-)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e574001..c61ef82 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,6 +239,7 @@ struct tcp_request_sock {
#endif
u32 rcv_isn;
u32 snt_isn;
+ u32 snt_synack; /* synack sent time */
};
static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7fddec3..f8477e3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -130,7 +130,13 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#endif
#define TCP_RTO_MAX ((unsigned)(120*HZ))
#define TCP_RTO_MIN ((unsigned)(HZ/5))
-#define TCP_TIMEOUT_INIT ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value */
+#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC2988bis initial RTO value */
+#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ)) /* RFC 1122 initial RTO value, now
+ * used as a fallback RTO for the
+ * initial data transmission if no
+ * valid RTT sample has been acquired,
+ * most likely due to retrans in 3WHS.
+ */
#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
* for local resources.
@@ -287,7 +293,7 @@ static inline void tcp_synq_overflow(struct sock *sk)
static inline int tcp_synq_no_recent_overflow(const struct sock *sk)
{
unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- return time_after(jiffies, last_overflow + TCP_TIMEOUT_INIT);
+ return time_after(jiffies, last_overflow + TCP_TIMEOUT_FALLBACK);
}
extern struct proto tcp_prot;
@@ -532,6 +538,7 @@ extern void tcp_initialize_rcv_mss(struct sock *sk);
extern int tcp_mtu_to_mss(struct sock *sk, int pmtu);
extern int tcp_mss_to_mtu(struct sock *sk, int mss);
extern void tcp_mtup_init(struct sock *sk);
+extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt);
static inline void tcp_bound_rto(const struct sock *sk)
{
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a6e0e07..ed901c4 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -304,6 +304,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e6bec56..2f2a81d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -868,17 +868,18 @@ static void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
tp->snd_ssthresh = tp->snd_cwnd_clamp;
+ } else {
+ /* ssthresh may have been reduced unnecessarily during.
+ * 3WHS. Restore it back to its initial default.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
if (dst_metric(dst, RTAX_REORDERING) &&
tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
tcp_disable_fack(tp);
tp->reordering = dst_metric(dst, RTAX_REORDERING);
}
-
- if (dst_metric(dst, RTAX_RTT) == 0)
- goto reset;
-
- if (!tp->srtt && dst_metric_rtt(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ if (dst_metric(dst, RTAX_RTT) == 0 || tp->srtt == 0)
goto reset;
/* Initial rtt is determined from SYN,SYN-ACK.
@@ -904,25 +905,28 @@ static void tcp_init_metrics(struct sock *sk)
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
}
tcp_set_rto(sk);
- if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
- goto reset;
-
-cwnd:
- tp->snd_cwnd = tcp_init_cwnd(tp, dst);
- tp->snd_cwnd_stamp = tcp_time_stamp;
- return;
reset:
- /* Play conservative. If timestamps are not
- * supported, TCP will fail to recalculate correct
- * rtt, if initial rto is too small. FORGET ALL AND RESET!
- */
- if (!tp->rx_opt.saw_tstamp && tp->srtt) {
- tp->srtt = 0;
- tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
- inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
- }
- goto cwnd;
+ if (tp->srtt == 0) {
+ /* RFC2988bis: We've failed to get a valid RTT sample from
+ * 3WHS. This is most likely due to retransmission,
+ * including spurious one. Reset the RTO back to 3secs
+ * from the more aggressive 1sec to avoid more spurious
+ * retransmission.
+ */
+ tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_FALLBACK;
+ inet_csk(sk)->icsk_rto = TCP_TIMEOUT_FALLBACK;
+ }
+ /* Cut cwnd down to 1 per RFC5681 if SYN or SYN-ACK has been
+ * retransmitted. In light of RFC2988bis' more aggressive 1sec
+ * initRTO, we only reset cwnd when more than 1 SYN/SYN-ACK
+ * retransmission has occurred.
+ */
+ if (tp->total_retrans > 1)
+ tp->snd_cwnd = 1;
+ else
+ tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ tp->snd_cwnd_stamp = tcp_time_stamp;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
@@ -3064,12 +3068,13 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
tcp_xmit_retransmit_queue(sk);
}
-static void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
+void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt)
{
tcp_rtt_estimator(sk, seq_rtt);
tcp_set_rto(sk);
inet_csk(sk)->icsk_backoff = 0;
}
+EXPORT_SYMBOL(tcp_valid_rtt_meas);
/* Read draft-ietf-tcplw-high-performance before mucking
* with this code. (Supersedes RFC1323)
@@ -5696,12 +5701,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- /* tcp_ack considers this ACK as duplicate
- * and does not calculate rtt.
- * Force it here.
- */
- tcp_ack_update_rtt(sk, 0, 0);
-
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 363136b..0f25256 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -407,8 +407,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
break;
icsk->icsk_backoff--;
- inet_csk(sk)->icsk_rto = __tcp_set_rto(tp) <<
- icsk->icsk_backoff;
+ inet_csk(sk)->icsk_rto = (tp->srtt ? __tcp_set_rto(tp) :
+ TCP_TIMEOUT_INIT) << icsk->icsk_backoff;
tcp_bound_rto(sk);
skb = tcp_write_queue_head(sk);
@@ -1333,6 +1333,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
isn = tcp_v4_init_sequence(skb);
}
tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
if (__tcp_v4_send_synack(sk, req, dst) || want_cookie)
goto drop_and_free;
@@ -1401,6 +1402,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
tcp_initialize_rcv_mss(newsk);
+ if (tcp_rsk(req)->snt_synack)
+ tcp_valid_rtt_meas(newsk,
+ tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ newtp->total_retrans = req->retrans;
#ifdef CONFIG_TCP_MD5SIG
/* Copy over the MD5 key from the original socket */
@@ -1812,8 +1817,7 @@ static int tcp_v4_init_sock(struct sock *sk)
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- tp->snd_cwnd = 2;
-
+ tp->snd_cwnd = TCP_INIT_CWND;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
*/
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ab7ac46..90da62a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -417,7 +417,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
* algorithms that we must have the following bandaid to talk
* efficiently to them. -DaveM
*/
- newtp->snd_cwnd = 2;
+ newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
@@ -647,6 +647,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
inet_rsk(req)->acked = 1;
return NULL;
}
+ if (tmp_opt.saw_tstamp && tmp_opt.rcv_tsecr)
+ tcp_rsk(req)->snt_synack = tmp_opt.rcv_tsecr;
+ else if (req->retrans) /* don't take RTT sample if retrans && ~TS */
+ tcp_rsk(req)->snt_synack = 0;
+
/* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 6b6ae91..9333737 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -230,6 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
+ treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8babc93..482dd69 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1239,7 +1239,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
}
tcp_rsk(req)->snt_isn = isn;
-
+ tcp_rsk(req)->snt_synack = tcp_time_stamp;
security_inet_conn_request(sk, skb, req);
if (tcp_v6_send_synack(sk, req))
@@ -1431,6 +1431,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
tcp_initialize_rcv_mss(newsk);
+ if (tcp_rsk(req)->snt_synack)
+ tcp_valid_rtt_meas(newsk,
+ tcp_time_stamp - tcp_rsk(req)->snt_synack);
+ newtp->total_retrans = req->retrans;
newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
--
1.7.5.4