Skip to content

Commit 66b13d9

Browse files
Eric Dumazetdavem330
authored andcommitted
ipv4: tcp: fix TOS value in ACK messages sent from TIME_WAIT
There is a long standing bug in linux tcp stack, about ACK messages sent on behalf of TIME_WAIT sockets. In the IP header of the ACK message, we choose to reflect TOS field of incoming message, and this might break some setups. Example of things that were broken : - Routing using TOS as a selector - Firewalls - Trafic classification / shaping We now remember in timewait structure the inet tos field and use it in ACK generation, and route lookup. Notes : - We still reflect incoming TOS in RST messages. - We could extend MuraliRaja Muniraju patch to report TOS value in netlink messages for TIME_WAIT sockets. - A patch is needed for IPv6 Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent 318cf7a commit 66b13d9

File tree

5 files changed

+15
-9
lines changed

5 files changed

+15
-9
lines changed

include/net/inet_timewait_sock.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ struct inet_timewait_sock {
126126
/* And these are ours. */
127127
unsigned int tw_ipv6only : 1,
128128
tw_transparent : 1,
129-
tw_pad : 14, /* 14 bits hole */
129+
tw_pad : 6, /* 6 bits hole */
130+
tw_tos : 8,
130131
tw_ipv6_offset : 16;
131132
kmemcheck_bitfield_end(flags);
132133
unsigned long tw_ttd;

include/net/ip.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ struct ip_reply_arg {
165165
int csumoffset; /* u16 offset of csum in iov[0].iov_base */
166166
/* -1 if not needed */
167167
int bound_dev_if;
168+
u8 tos;
168169
};
169170

170171
#define IP_REPLY_ARG_NOSRCCHECK 1
@@ -175,7 +176,7 @@ static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg)
175176
}
176177

177178
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
178-
struct ip_reply_arg *arg, unsigned int len);
179+
const struct ip_reply_arg *arg, unsigned int len);
179180

180181
struct ipv4_config {
181182
int log_martians;

net/ipv4/inet_timewait_sock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
183183
tw->tw_daddr = inet->inet_daddr;
184184
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
185185
tw->tw_bound_dev_if = sk->sk_bound_dev_if;
186+
tw->tw_tos = inet->tos;
186187
tw->tw_num = inet->inet_num;
187188
tw->tw_state = TCP_TIME_WAIT;
188189
tw->tw_substate = state;

net/ipv4/ip_output.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,7 +1466,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
14661466
* structure to pass arguments.
14671467
*/
14681468
void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
1469-
struct ip_reply_arg *arg, unsigned int len)
1469+
const struct ip_reply_arg *arg, unsigned int len)
14701470
{
14711471
struct inet_sock *inet = inet_sk(sk);
14721472
struct ip_options_data replyopts;
@@ -1489,7 +1489,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
14891489
}
14901490

14911491
flowi4_init_output(&fl4, arg->bound_dev_if, 0,
1492-
RT_TOS(ip_hdr(skb)->tos),
1492+
RT_TOS(arg->tos),
14931493
RT_SCOPE_UNIVERSE, sk->sk_protocol,
14941494
ip_reply_arg_flowi_flags(arg),
14951495
daddr, rt->rt_spec_dst,
@@ -1506,7 +1506,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, __be32 daddr,
15061506
with locally disabled BH and that sk cannot be already spinlocked.
15071507
*/
15081508
bh_lock_sock(sk);
1509-
inet->tos = ip_hdr(skb)->tos;
1509+
inet->tos = arg->tos;
15101510
sk->sk_priority = skb->priority;
15111511
sk->sk_protocol = ip_hdr(skb)->protocol;
15121512
sk->sk_bound_dev_if = arg->bound_dev_if;

net/ipv4/tcp_ipv4.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
652652
arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0;
653653

654654
net = dev_net(skb_dst(skb)->dev);
655+
arg.tos = ip_hdr(skb)->tos;
655656
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
656657
&arg, arg.iov[0].iov_len);
657658

@@ -666,7 +667,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
666667
static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
667668
u32 win, u32 ts, int oif,
668669
struct tcp_md5sig_key *key,
669-
int reply_flags)
670+
int reply_flags, u8 tos)
670671
{
671672
const struct tcphdr *th = tcp_hdr(skb);
672673
struct {
@@ -726,7 +727,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
726727
arg.csumoffset = offsetof(struct tcphdr, check) / 2;
727728
if (oif)
728729
arg.bound_dev_if = oif;
729-
730+
arg.tos = tos;
730731
ip_send_reply(net->ipv4.tcp_sock, skb, ip_hdr(skb)->saddr,
731732
&arg, arg.iov[0].iov_len);
732733

@@ -743,7 +744,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
743744
tcptw->tw_ts_recent,
744745
tw->tw_bound_dev_if,
745746
tcp_twsk_md5_key(tcptw),
746-
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0
747+
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
748+
tw->tw_tos
747749
);
748750

749751
inet_twsk_put(tw);
@@ -757,7 +759,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
757759
req->ts_recent,
758760
0,
759761
tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr),
760-
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0);
762+
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
763+
ip_hdr(skb)->tos);
761764
}
762765

763766
/*

0 commit comments

Comments
 (0)