Skip to content

Commit

Permalink
ipv4: add a sock pointer to dst->output() path.
Browse files Browse the repository at this point in the history
In the dst->output() path for ipv4, the code assumes the skb it has to
transmit is attached to an inet socket, specifically via
ip_mc_output() : The sk_mc_loop() test triggers a WARN_ON() when the
provider of the packet is an AF_PACKET socket.

The dst->output() method gets an additional 'struct sock *sk'
parameter. This needs a cascade of changes so that this parameter can
be propagated from vxlan to final consumer.

Fixes: 8f646c9 ("vxlan: keep original skb ownership")
Reported-by: lucien xin <lucien.xin@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and davem330 committed Apr 15, 2014
1 parent b0270e9 commit aad8872
Show file tree
Hide file tree
Showing 19 changed files with 74 additions and 46 deletions.
4 changes: 2 additions & 2 deletions drivers/net/vxlan.c
Expand Up @@ -1755,8 +1755,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
if (err)
return err;

return iptunnel_xmit(rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df,
false);
return iptunnel_xmit(vs->sock->sk, rt, skb, src, dst, IPPROTO_UDP,
tos, ttl, df, false);
}
EXPORT_SYMBOL_GPL(vxlan_xmit_skb);

Expand Down
14 changes: 11 additions & 3 deletions include/net/dst.h
Expand Up @@ -45,7 +45,7 @@ struct dst_entry {
void *__pad1;
#endif
int (*input)(struct sk_buff *);
int (*output)(struct sk_buff *);
int (*output)(struct sock *sk, struct sk_buff *skb);

unsigned short flags;
#define DST_HOST 0x0001
Expand Down Expand Up @@ -367,7 +367,11 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
return child;
}

int dst_discard(struct sk_buff *skb);
int dst_discard_sk(struct sock *sk, struct sk_buff *skb);
static inline int dst_discard(struct sk_buff *skb)
{
return dst_discard_sk(skb->sk, skb);
}
void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref,
int initial_obsolete, unsigned short flags);
void __dst_free(struct dst_entry *dst);
Expand Down Expand Up @@ -449,9 +453,13 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout)
}

/* Output packet to network from transport. */
static inline int dst_output_sk(struct sock *sk, struct sk_buff *skb)
{
return skb_dst(skb)->output(sk, skb);
}
static inline int dst_output(struct sk_buff *skb)
{
return skb_dst(skb)->output(skb);
return dst_output_sk(skb->sk, skb);
}

/* Input packet from network to transport. */
Expand Down
11 changes: 8 additions & 3 deletions include/net/ip.h
Expand Up @@ -104,13 +104,18 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
struct net_device *orig_dev);
int ip_local_deliver(struct sk_buff *skb);
int ip_mr_input(struct sk_buff *skb);
int ip_output(struct sk_buff *skb);
int ip_mc_output(struct sk_buff *skb);
int ip_output(struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct sock *sk, struct sk_buff *skb);
int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
int ip_do_nat(struct sk_buff *skb);
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct sk_buff *skb);
int ip_local_out(struct sk_buff *skb);
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb);
static inline int ip_local_out(struct sk_buff *skb)
{
return ip_local_out_sk(skb->sk, skb);
}

int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
void ip_init(void);
int ip_append_data(struct sock *sk, struct flowi4 *fl4,
Expand Down
2 changes: 1 addition & 1 deletion include/net/ip_tunnels.h
Expand Up @@ -153,7 +153,7 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
}

int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet);

Expand Down
2 changes: 1 addition & 1 deletion include/net/ipv6.h
Expand Up @@ -731,7 +731,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net,
* skb processing functions
*/

int ip6_output(struct sk_buff *skb);
int ip6_output(struct sock *sk, struct sk_buff *skb);
int ip6_forward(struct sk_buff *skb);
int ip6_input(struct sk_buff *skb);
int ip6_mc_input(struct sk_buff *skb);
Expand Down
6 changes: 3 additions & 3 deletions include/net/xfrm.h
Expand Up @@ -333,7 +333,7 @@ struct xfrm_state_afinfo {
const xfrm_address_t *saddr);
int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n);
int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n);
int (*output)(struct sk_buff *skb);
int (*output)(struct sock *sk, struct sk_buff *skb);
int (*output_finish)(struct sk_buff *skb);
int (*extract_input)(struct xfrm_state *x,
struct sk_buff *skb);
Expand Down Expand Up @@ -1540,7 +1540,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)

int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_output(struct sk_buff *skb);
int xfrm4_output(struct sock *sk, struct sk_buff *skb);
int xfrm4_output_finish(struct sk_buff *skb);
int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
Expand All @@ -1565,7 +1565,7 @@ __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
__be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_output(struct sk_buff *skb);
int xfrm6_output(struct sock *sk, struct sk_buff *skb);
int xfrm6_output_finish(struct sk_buff *skb);
int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
u8 **prevhdr);
Expand Down
15 changes: 9 additions & 6 deletions net/core/dst.c
Expand Up @@ -142,12 +142,12 @@ static void dst_gc_task(struct work_struct *work)
mutex_unlock(&dst_gc_mutex);
}

int dst_discard(struct sk_buff *skb)
int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
EXPORT_SYMBOL(dst_discard);
EXPORT_SYMBOL(dst_discard_sk);

const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
Expand Down Expand Up @@ -184,7 +184,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
dst->output = dst_discard;
dst->output = dst_discard_sk;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
Expand All @@ -209,8 +209,10 @@ static void ___dst_free(struct dst_entry *dst)
/* The first case (dev==NULL) is required, when
protocol module is unloaded.
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
dst->input = dst->output = dst_discard;
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard;
dst->output = dst_discard_sk;
}
dst->obsolete = DST_OBSOLETE_DEAD;
}

Expand Down Expand Up @@ -361,7 +363,8 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
return;

if (!unregister) {
dst->input = dst->output = dst_discard;
dst->input = dst_discard;
dst->output = dst_discard_sk;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
Expand Down
16 changes: 14 additions & 2 deletions net/decnet/dn_route.c
Expand Up @@ -752,7 +752,7 @@ static int dn_to_neigh_output(struct sk_buff *skb)
return n->output(n, skb);
}

static int dn_output(struct sk_buff *skb)
static int dn_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
Expand Down Expand Up @@ -838,6 +838,18 @@ static int dn_forward(struct sk_buff *skb)
* Used to catch bugs. This should never normally get
* called.
*/
static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);

net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
le16_to_cpu(cb->src), le16_to_cpu(cb->dst));

kfree_skb(skb);

return NET_RX_DROP;
}

static int dn_rt_bug(struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
Expand Down Expand Up @@ -1463,7 +1475,7 @@ static int dn_route_input_slow(struct sk_buff *skb)

rt->n = neigh;
rt->dst.lastuse = jiffies;
rt->dst.output = dn_rt_bug;
rt->dst.output = dn_rt_bug_sk;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
Expand Down
11 changes: 5 additions & 6 deletions net/ipv4/ip_output.c
Expand Up @@ -101,17 +101,17 @@ int __ip_local_out(struct sk_buff *skb)
skb_dst(skb)->dev, dst_output);
}

int ip_local_out(struct sk_buff *skb)
int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
{
int err;

err = __ip_local_out(skb);
if (likely(err == 1))
err = dst_output(skb);
err = dst_output_sk(sk, skb);

return err;
}
EXPORT_SYMBOL_GPL(ip_local_out);
EXPORT_SYMBOL_GPL(ip_local_out_sk);

static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
Expand Down Expand Up @@ -226,9 +226,8 @@ static int ip_finish_output(struct sk_buff *skb)
return ip_finish_output2(skb);
}

int ip_mc_output(struct sk_buff *skb)
int ip_mc_output(struct sock *sk, struct sk_buff *skb)
{
struct sock *sk = skb->sk;
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;

Expand Down Expand Up @@ -287,7 +286,7 @@ int ip_mc_output(struct sk_buff *skb)
!(IPCB(skb)->flags & IPSKB_REROUTED));
}

int ip_output(struct sk_buff *skb)
int ip_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;

Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_tunnel.c
Expand Up @@ -670,7 +670,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
return;
}

err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol,
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);

Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/ip_tunnel_core.c
Expand Up @@ -46,7 +46,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>

int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
Expand Down Expand Up @@ -76,7 +76,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
iph->ttl = ttl;
__ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);

err = ip_local_out(skb);
err = ip_local_out_sk(sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
return pkt_len;
Expand Down
4 changes: 2 additions & 2 deletions net/ipv4/route.c
Expand Up @@ -1129,7 +1129,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
dst_set_expires(&rt->dst, 0);
}

static int ip_rt_bug(struct sk_buff *skb)
static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
{
pr_debug("%s: %pI4 -> %pI4, %s\n",
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
Expand Down Expand Up @@ -2218,7 +2218,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or

new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
new->output = dst_discard_sk;

new->dev = ort->dst.dev;
if (new->dev)
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/xfrm4_output.c
Expand Up @@ -86,7 +86,7 @@ int xfrm4_output_finish(struct sk_buff *skb)
return xfrm_output(skb);
}

int xfrm4_output(struct sk_buff *skb)
int xfrm4_output(struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/ip6_output.c
Expand Up @@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb)
return ip6_finish_output2(skb);
}

int ip6_output(struct sk_buff *skb)
int ip6_output(struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
Expand Down
14 changes: 7 additions & 7 deletions net/ipv6/route.c
Expand Up @@ -84,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);

static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
Expand Down Expand Up @@ -290,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
.output = dst_discard,
.output = dst_discard_sk,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
Expand Down Expand Up @@ -1058,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori

new->__use = 1;
new->input = dst_discard;
new->output = dst_discard;
new->output = dst_discard_sk;

if (dst_metrics_read_only(&ort->dst))
new->_metrics = ort->dst._metrics;
Expand Down Expand Up @@ -1577,7 +1577,7 @@ int ip6_route_add(struct fib6_config *cfg)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
rt->dst.output = dst_discard;
rt->dst.output = dst_discard_sk;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
Expand Down Expand Up @@ -2129,7 +2129,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}

static int ip6_pkt_discard_out(struct sk_buff *skb)
static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
Expand All @@ -2140,7 +2140,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}

static int ip6_pkt_prohibit_out(struct sk_buff *skb)
static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
Expand Down
5 changes: 3 additions & 2 deletions net/ipv6/sit.c
Expand Up @@ -974,8 +974,9 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto out;
}

err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos,
ttl, df, !net_eq(tunnel->net, dev_net(dev)));
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
IPPROTO_IPV6, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
return NETDEV_TX_OK;

Expand Down
2 changes: 1 addition & 1 deletion net/ipv6/xfrm6_output.c
Expand Up @@ -163,7 +163,7 @@ static int __xfrm6_output(struct sk_buff *skb)
return x->outer_mode->afinfo->output_finish(skb);
}

int xfrm6_output(struct sk_buff *skb)
int xfrm6_output(struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
skb_dst(skb)->dev, __xfrm6_output);
Expand Down
2 changes: 1 addition & 1 deletion net/openvswitch/vport-gre.c
Expand Up @@ -174,7 +174,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)

skb->local_df = 1;

return iptunnel_xmit(rt, skb, fl.saddr,
return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
OVS_CB(skb)->tun_key->ipv4_dst, IPPROTO_GRE,
OVS_CB(skb)->tun_key->ipv4_tos,
OVS_CB(skb)->tun_key->ipv4_ttl, df, false);
Expand Down
2 changes: 1 addition & 1 deletion net/xfrm/xfrm_policy.c
Expand Up @@ -1842,7 +1842,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
xfrm_pol_put(pol);
}

static int xdst_queue_output(struct sk_buff *skb)
static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
Expand Down

0 comments on commit aad8872

Please sign in to comment.