diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt b/release/src-rt-6.x.4708/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt index f350c69b2bb..c3df56550c5 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt @@ -158,6 +158,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/snmp.h b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/snmp.h index ebb0c80ffd6..b8c0e371036 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/snmp.h +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/snmp.h @@ -230,6 +230,7 @@ enum LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ + LINUX_MIB_TCPWQUEUETOOBIG , /* TCPWqueueTooBig */ __LINUX_MIB_MAX }; diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/tcp.h b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/tcp.h index a778ee02459..fb6eb49f67f 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/tcp.h +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/linux/tcp.h @@ -488,6 +488,9 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, + int shiftlen); + #endif /* __KERNEL__ */ #endif /* _LINUX_TCP_H */ diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/net/tcp.h b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/net/tcp.h index e3ff66f9309..56ad44502bd 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/include/net/tcp.h +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/include/net/tcp.h @@ -53,6 +53,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some @@ -240,6 +242,7 @@ extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; +extern int sysctl_tcp_min_snd_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/proc.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/proc.c index 22a73c772e4..023133ed6ba 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/proc.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/proc.c @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_SENTINEL }; diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17..e1bd3f190bf 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -504,6 +506,15 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_min_snd_mss", + .data = &sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, { .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp.c index 020da8233ac..42d06cf4a1f 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp.c @@ -3263,6 +3263,7 @@ void __init tcp_init(void) int i, max_share, cnt; unsigned long jiffy = jiffies; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_input.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_input.c index 49b2ee83557..70541c9bbe3 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_input.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_input.c @@ -1376,13 +1376,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return sacked; } -static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, int dup_sack) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *prev = tcp_write_queue_prev(sk, skb); BUG_ON(!pcount); @@ -1396,6 +1396,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(prev)->gso_segs += pcount; BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); skb_shinfo(skb)->gso_segs -= pcount; /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1463,6 +1464,21 @@ static int skb_can_shift(struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1474,6 +1490,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; + int next_pcount; int pcount = 0; int len; int in_sack; @@ -1564,9 +1581,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; - if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1583,9 +1600,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + next_pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, next_pcount, len)) { + pcount += next_pcount; + tcp_shifted_skb(sk, prev, skb, state, next_pcount, len, mss, 0); } out: diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_output.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_output.c index fd709f71579..44a75e631ed 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_output.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_output.c @@ -59,6 +59,7 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_mtu_probing __read_mostly = 0; int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_min_snd_mss __read_mostly = TCP_MIN_SND_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -1006,6 +1007,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_cloned(skb) && skb_is_nonlinear(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -1159,8 +1165,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sysctl_tcp_min_snd_mss); /* Now subtract TCP options size, not including SACKs */ mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); diff --git a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_timer.c b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_timer.c index 2b69168c413..92912b2b8a2 100644 --- a/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_timer.c +++ b/release/src-rt-6.x.4708/linux/linux-2.6.36/net/ipv4/tcp_timer.c @@ -127,6 +127,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); + mss = max(mss, sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt index f350c69b2bb..c3df56550c5 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt @@ -158,6 +158,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/snmp.h b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/snmp.h index ebb0c80ffd6..b8c0e371036 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/snmp.h +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/snmp.h @@ -230,6 +230,7 @@ enum LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ + LINUX_MIB_TCPWQUEUETOOBIG , /* TCPWqueueTooBig */ __LINUX_MIB_MAX }; diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/tcp.h b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/tcp.h index a778ee02459..fb6eb49f67f 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/tcp.h +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/linux/tcp.h @@ -488,6 +488,9 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, + int shiftlen); + #endif /* __KERNEL__ */ #endif /* _LINUX_TCP_H */ diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/net/tcp.h b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/net/tcp.h index e3ff66f9309..56ad44502bd 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/net/tcp.h +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/include/net/tcp.h @@ -53,6 +53,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some @@ -240,6 +242,7 @@ extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; +extern int sysctl_tcp_min_snd_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/proc.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/proc.c index 22a73c772e4..023133ed6ba 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/proc.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/proc.c @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_SENTINEL }; diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17..e1bd3f190bf 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -504,6 +506,15 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_min_snd_mss", + .data = &sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, { .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp.c index 37b1f8a8686..9b12ee60b32 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp.c @@ -3265,6 +3265,7 @@ void __init tcp_init(void) int i, max_share, cnt; unsigned long jiffy = jiffies; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_input.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_input.c index 49b2ee83557..70541c9bbe3 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_input.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_input.c @@ -1376,13 +1376,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return sacked; } -static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, int dup_sack) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *prev = tcp_write_queue_prev(sk, skb); BUG_ON(!pcount); @@ -1396,6 +1396,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(prev)->gso_segs += pcount; BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); skb_shinfo(skb)->gso_segs -= pcount; /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1463,6 +1464,21 @@ static int skb_can_shift(struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1474,6 +1490,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; + int next_pcount; int pcount = 0; int len; int in_sack; @@ -1564,9 +1581,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; - if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1583,9 +1600,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + next_pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, next_pcount, len)) { + pcount += next_pcount; + tcp_shifted_skb(sk, prev, skb, state, next_pcount, len, mss, 0); } out: diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_output.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_output.c index fd709f71579..44a75e631ed 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_output.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_output.c @@ -59,6 +59,7 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_mtu_probing __read_mostly = 0; int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_min_snd_mss __read_mostly = TCP_MIN_SND_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -1006,6 +1007,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_cloned(skb) && skb_is_nonlinear(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -1159,8 +1165,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sysctl_tcp_min_snd_mss); /* Now subtract TCP options size, not including SACKs */ mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); diff --git a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c index 2b69168c413..92912b2b8a2 100644 --- a/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c +++ b/release/src-rt-7.14.114.x/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c @@ -127,6 +127,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); + mss = max(mss, sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt b/release/src-rt-7.x.main/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt index f350c69b2bb..c3df56550c5 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/Documentation/networking/ip-sysctl.txt @@ -158,6 +158,14 @@ tcp_base_mss - INTEGER Path MTU discovery (MTU probing). If MTU probing is enabled, this is the initial MSS used by the connection. +tcp_min_snd_mss - INTEGER + TCP SYN and SYNACK messages usually advertise an ADVMSS option, + as described in RFC 1122 and RFC 6691. + If this ADVMSS option is smaller than tcp_min_snd_mss, + it is silently capped to tcp_min_snd_mss. + + Default : 48 (at least 8 bytes of payload per segment) + tcp_congestion_control - STRING Set the congestion control algorithm to be used for new connections. The algorithm "reno" is always available, but diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/snmp.h b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/snmp.h index ebb0c80ffd6..b8c0e371036 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/snmp.h +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/snmp.h @@ -230,6 +230,7 @@ enum LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */ LINUX_MIB_TCPDEFERACCEPTDROP, LINUX_MIB_IPRPFILTER, /* IP Reverse Path Filter (rp_filter) */ + LINUX_MIB_TCPWQUEUETOOBIG , /* TCPWqueueTooBig */ __LINUX_MIB_MAX }; diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/tcp.h b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/tcp.h index a778ee02459..fb6eb49f67f 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/tcp.h +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/linux/tcp.h @@ -488,6 +488,9 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount, + int shiftlen); + #endif /* __KERNEL__ */ #endif /* _LINUX_TCP_H */ diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/net/tcp.h b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/net/tcp.h index e3ff66f9309..56ad44502bd 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/net/tcp.h +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/include/net/tcp.h @@ -53,6 +53,8 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER (128 + MAX_HEADER) #define MAX_TCP_OPTION_SPACE 40 +#define TCP_MIN_SND_MSS 48 +#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE) /* * Never offer a window over 32767 without using window scaling. Some @@ -240,6 +242,7 @@ extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_abc; extern int sysctl_tcp_mtu_probing; extern int sysctl_tcp_base_mss; +extern int sysctl_tcp_min_snd_mss; extern int sysctl_tcp_workaround_signed_windows; extern int sysctl_tcp_slow_start_after_idle; extern int sysctl_tcp_max_ssthresh; diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/proc.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/proc.c index 22a73c772e4..023133ed6ba 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/proc.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/proc.c @@ -253,6 +253,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP), SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP), SNMP_MIB_ITEM("IPReversePathFilter", LINUX_MIB_IPRPFILTER), + SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG), SNMP_MIB_SENTINEL }; diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c index d96c1da4b17..e1bd3f190bf 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/sysctl_net_ipv4.c @@ -26,6 +26,8 @@ static int zero; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; +static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS; +static int tcp_min_snd_mss_max = 65535; /* Update system visible IP port range */ static void set_local_port_range(int range[2]) @@ -504,6 +506,15 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "tcp_min_snd_mss", + .data = &sysctl_tcp_min_snd_mss, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, { .procname = "tcp_workaround_signed_windows", .data = &sysctl_tcp_workaround_signed_windows, diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp.c index 020da8233ac..42d06cf4a1f 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp.c @@ -3263,6 +3263,7 @@ void __init tcp_init(void) int i, max_share, cnt; unsigned long jiffy = jiffies; + BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE); BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb)); percpu_counter_init(&tcp_sockets_allocated, 0); diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_input.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_input.c index 49b2ee83557..70541c9bbe3 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_input.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_input.c @@ -1376,13 +1376,13 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, return sacked; } -static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, +static int tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, int dup_sack) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *prev = tcp_write_queue_prev(sk, skb); BUG_ON(!pcount); @@ -1396,6 +1396,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(prev)->gso_segs += pcount; BUG_ON(skb_shinfo(skb)->gso_segs < pcount); + WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); skb_shinfo(skb)->gso_segs -= pcount; /* When we're adding to gso_segs == 1, gso_size will be zero, @@ -1463,6 +1464,21 @@ static int skb_can_shift(struct sk_buff *skb) return !skb_headlen(skb) && skb_is_nonlinear(skb); } +int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, + int pcount, int shiftlen) +{ + /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE) + * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need + * to make sure not storing more than 65535 * 8 bytes per skb, + * even if current MSS is bigger. + */ + if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE)) + return 0; + if (unlikely(tcp_skb_pcount(to) + pcount > 65535)) + return 0; + return skb_shift(to, from, shiftlen); +} + /* Try collapsing SACK blocks spanning across multiple skbs to a single * skb. */ @@ -1474,6 +1490,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *prev; int mss; + int next_pcount; int pcount = 0; int len; int in_sack; @@ -1564,9 +1581,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, } } - if (!skb_shift(prev, skb, len)) + if (!tcp_skb_shift(prev, skb, pcount, len)) goto fallback; - if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very @@ -1583,9 +1600,10 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto out; len = skb->len; - if (skb_shift(prev, skb, len)) { - pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + next_pcount = tcp_skb_pcount(skb); + if (tcp_skb_shift(prev, skb, next_pcount, len)) { + pcount += next_pcount; + tcp_shifted_skb(sk, prev, skb, state, next_pcount, len, mss, 0); } out: diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_output.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_output.c index fd709f71579..44a75e631ed 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_output.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_output.c @@ -59,6 +59,7 @@ int sysctl_tcp_tso_win_divisor __read_mostly = 3; int sysctl_tcp_mtu_probing __read_mostly = 0; int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_min_snd_mss __read_mostly = TCP_MIN_SND_MSS; /* By default, RFC2861 behavior. */ int sysctl_tcp_slow_start_after_idle __read_mostly = 1; @@ -1006,6 +1007,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; + if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG); + return -ENOMEM; + } + if (skb_cloned(skb) && skb_is_nonlinear(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -1159,8 +1165,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) mss_now -= icsk->icsk_ext_hdr_len; /* Then reserve room for full set of TCP options and 8 bytes of data */ - if (mss_now < 48) - mss_now = 48; + mss_now = max(mss_now, sysctl_tcp_min_snd_mss); /* Now subtract TCP options size, not including SACKs */ mss_now -= tp->tcp_header_len - sizeof(struct tcphdr); diff --git a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c index 2b69168c413..92912b2b8a2 100644 --- a/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c +++ b/release/src-rt-7.x.main/src/linux/linux-2.6.36/net/ipv4/tcp_timer.c @@ -127,6 +127,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(sysctl_tcp_base_mss, mss); mss = max(mss, 68 - tp->tcp_header_len); + mss = max(mss, sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); }