Skip to content

Commit

Permalink
Existense of PCB route caching doesn't allow us to use new fast route
Browse files Browse the repository at this point in the history
lookup KPI in ip_output() like it is already used in ip_forward().
However, when there is no PCB provided we can use fast KPI, gaining
performance advantage.

Typical case when ip_output() is called without a PCB pointer is a
sendto(2) on a not connected UDP socket. In practice DNS servers do
this.

Reviewed by:	melifaro
Differential Revision:	https://reviews.freebsd.org/D19804
  • Loading branch information
glebius committed May 8, 2019
1 parent 37294b7 commit c150a0f
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 84 deletions.
1 change: 1 addition & 0 deletions sys/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ struct rtentry {
#define NHF_DEFAULT 0x0080 /* Default route */
#define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */
#define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */
#define NHF_HOST 0x0400 /* RTF_HOST */

/* Nexthop request flags */
#define NHR_IFAIF 0x01 /* Return ifa_ifp interface */
Expand Down
1 change: 1 addition & 0 deletions sys/net/route_var.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags)
uint16_t res;

res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
Expand Down
7 changes: 2 additions & 5 deletions sys/netinet/in_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
uint32_t flags, struct nhop4_extended *pnh4)
{
struct sockaddr_in *gw;
struct in_ifaddr *ia;

if ((flags & NHR_IFAIF) != 0)
pnh4->nh_ifp = rte->rt_ifa->ifa_ifp;
Expand All @@ -113,10 +112,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in_addr dst,
gw = (struct sockaddr_in *)rt_key(rte);
if (gw->sin_addr.s_addr == 0)
pnh4->nh_flags |= NHF_DEFAULT;
/* XXX: Set RTF_BROADCAST if GW address is broadcast */

ia = ifatoia(rte->rt_ifa);
pnh4->nh_src = IA_SIN(ia)->sin_addr;
pnh4->nh_ia = ifatoia(rte->rt_ifa);
pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr;
}

/*
Expand Down
3 changes: 2 additions & 1 deletion sys/netinet/in_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,13 @@ struct nhop4_basic {
/* Extended nexthop info used for control protocols */
struct nhop4_extended {
struct ifnet *nh_ifp; /* Logical egress interface */
struct in_ifaddr *nh_ia; /* Associated address */
uint16_t nh_mtu; /* nexthop mtu */
uint16_t nh_flags; /* nhop flags */
uint8_t spare[4];
struct in_addr nh_addr; /* GW/DST IPv4 address */
struct in_addr nh_src; /* default source IPv4 address */
uint64_t spare2[2];
uint64_t spare2;
};

int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags,
Expand Down
174 changes: 96 additions & 78 deletions sys/netinet/ip_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$");
#include <net/vnet.h>

#include <netinet/in.h>
#include <netinet/in_fib.h>
#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
Expand Down Expand Up @@ -227,13 +228,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
int hlen = sizeof (struct ip);
int mtu;
int error = 0;
struct sockaddr_in *dst;
struct sockaddr_in *dst, sin;
const struct sockaddr_in *gw;
struct in_ifaddr *ia;
struct in_addr src;
int isbroadcast;
uint16_t ip_len, ip_off;
struct route iproute;
struct rtentry *rte; /* cache for ro->ro_rt */
uint32_t fibnum;
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
int no_route_but_check_spd = 0;
Expand All @@ -252,11 +252,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
#endif
}

if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
}

if (opt) {
int len = 0;
m = ip_insertoptions(m, opt, &len);
Expand All @@ -281,26 +276,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
/*
* dst/gw handling:
*
* dst can be rewritten but always points to &ro->ro_dst.
* gw is readonly but can point either to dst OR rt_gateway,
* therefore we need restore gw if we're redoing lookup.
*/
gw = dst = (struct sockaddr_in *)&ro->ro_dst;
fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
rte = ro->ro_rt;
if (rte == NULL) {
if (ro != NULL)
dst = (struct sockaddr_in *)&ro->ro_dst;
else
dst = &sin;
if (ro == NULL || ro->ro_rt == NULL) {
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = ip->ip_dst;
}
gw = dst;
NET_EPOCH_ENTER(et);
again:
/*
* Validate route against routing table additions;
* a better/more specific route might have been added.
*/
if (inp)
if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
/*
* If there is a cached route,
Expand All @@ -310,15 +307,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* cache with IPv6.
* Also check whether routing cache needs invalidation.
*/
rte = ro->ro_rt;
if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
rte->rt_ifp == NULL ||
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
if (ro != NULL && ro->ro_rt != NULL &&
((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr))
RO_INVALIDATE_CACHE(ro);
rte = NULL;
}
ia = NULL;
/*
* If routing to interface only, short circuit routing lookup.
Expand All @@ -338,8 +332,10 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = 1;
src = IA_SIN(ia)->sin_addr;
} else if (flags & IP_ROUTETOIF) {
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst),
M_GETFIB(m)))) == NULL &&
Expand All @@ -350,38 +346,77 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
goto bad;
}
ifp = ia->ia_ifp;
mtu = ifp->if_mtu;
ip->ip_ttl = 1;
isbroadcast = ifp->if_flags & IFF_BROADCAST ?
in_ifaddr_broadcast(dst->sin_addr, ia) : 0;
src = IA_SIN(ia)->sin_addr;
} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
imo != NULL && imo->imo_multicast_ifp != NULL) {
/*
* Bypass the normal routing lookup for multicast
* packets if the interface is specified.
*/
ifp = imo->imo_multicast_ifp;
mtu = ifp->if_mtu;
IFP_TO_IA(ifp, ia, &in_ifa_tracker);
isbroadcast = 0; /* fool gcc */
} else {
/*
* We want to do any cloning requested by the link layer,
* as this is probably required in all cases for correct
* operation (as it is for ARP).
*/
if (rte == NULL) {
src = IA_SIN(ia)->sin_addr;
} else if (ro != NULL) {
if (ro->ro_rt == NULL) {
/*
* We want to do any cloning requested by the link
* layer, as this is probably required in all cases
* for correct operation (as it is for ARP).
*/
#ifdef RADIX_MPATH
rtalloc_mpath_fib(ro,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
fibnum);
#else
in_rtalloc_ign(ro, 0, fibnum);
#endif
rte = ro->ro_rt;
if (ro->ro_rt == NULL ||
(ro->ro_rt->rt_flags & RTF_UP) == 0 ||
ro->ro_rt->rt_ifp == NULL ||
!RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* There is no route for this packet, but it is
* possible that a matching SPD entry exists.
*/
no_route_but_check_spd = 1;
mtu = 0; /* Silence GCC warning. */
goto sendit;
#endif
IPSTAT_INC(ips_noroute);
error = EHOSTUNREACH;
goto bad;
}
}
if (rte == NULL ||
(rte->rt_flags & RTF_UP) == 0 ||
rte->rt_ifp == NULL ||
!RT_LINK_IS_UP(rte->rt_ifp)) {
ia = ifatoia(ro->ro_rt->rt_ifa);
ifp = ro->ro_rt->rt_ifp;
counter_u64_add(ro->ro_rt->rt_pksent, 1);
rt_update_ro_flags(ro);
if (ro->ro_rt->rt_flags & RTF_GATEWAY)
gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
if (ro->ro_rt->rt_flags & RTF_HOST)
isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
else if (ifp->if_flags & IFF_BROADCAST)
isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
else
isbroadcast = 0;
if (ro->ro_rt->rt_flags & RTF_HOST)
mtu = ro->ro_rt->rt_mtu;
else
mtu = ifp->if_mtu;
src = IA_SIN(ia)->sin_addr;
} else {
struct nhop4_extended nh;

bzero(&nh, sizeof(nh));
if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) !=
0) {
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* There is no route for this packet, but it is
Expand All @@ -395,31 +430,29 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
error = EHOSTUNREACH;
goto bad;
}
ia = ifatoia(rte->rt_ifa);
ifp = rte->rt_ifp;
counter_u64_add(rte->rt_pksent, 1);
rt_update_ro_flags(ro);
if (rte->rt_flags & RTF_GATEWAY)
gw = (struct sockaddr_in *)rte->rt_gateway;
if (rte->rt_flags & RTF_HOST)
isbroadcast = (rte->rt_flags & RTF_BROADCAST);
else if (ifp->if_flags & IFF_BROADCAST)
isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
else
isbroadcast = 0;
ifp = nh.nh_ifp;
mtu = nh.nh_mtu;
/*
* We are rewriting here dst to be gw actually, contradicting
* comment at the beginning of the function. However, in this
* case we are always dealing with on stack dst.
* In case if pfil(9) sends us back to beginning of the
* function, the dst would be rewritten by ip_output_pfil().
*/
MPASS(dst == &sin);
dst->sin_addr = nh.nh_addr;
ia = nh.nh_ia;
src = nh.nh_src;
isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) ==
(NHF_HOST | NHF_BROADCAST)) ||
((ifp->if_flags & IFF_BROADCAST) &&
in_ifaddr_broadcast(dst->sin_addr, ia)));
}

/*
* Calculate MTU. If we have a route that is up, use that,
* otherwise use the interface's MTU.
*/
if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST)))
mtu = rte->rt_mtu;
else
mtu = ifp->if_mtu;
/* Catch a possible divide by zero later. */
KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p",
__func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp));
KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
__func__, mtu, ro,
(ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));

if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
Expand Down Expand Up @@ -455,11 +488,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* If source address not specified yet, use address
* of outgoing interface.
*/
if (ip->ip_src.s_addr == INADDR_ANY) {
/* Interface may have no addresses. */
if (ia != NULL)
ip->ip_src = IA_SIN(ia)->sin_addr;
}
if (ip->ip_src.s_addr == INADDR_ANY)
ip->ip_src = src;

if ((imo == NULL && in_mcast_loop) ||
(imo && imo->imo_multicast_loop)) {
Expand Down Expand Up @@ -522,12 +552,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* If the source address is not specified yet, use the address
* of the outoing interface.
*/
if (ip->ip_src.s_addr == INADDR_ANY) {
/* Interface may have no addresses. */
if (ia != NULL) {
ip->ip_src = IA_SIN(ia)->sin_addr;
}
}
if (ip->ip_src.s_addr == INADDR_ANY)
ip->ip_src = src;

/*
* Look for broadcast address and
Expand Down Expand Up @@ -587,9 +613,10 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,

case -1: /* Need to try again */
/* Reset everything for a new round */
RO_RTFREE(ro);
ro->ro_prepend = NULL;
rte = NULL;
if (ro != NULL) {
RO_RTFREE(ro);
ro->ro_prepend = NULL;
}
gw = dst;
ip = mtod(m, struct ip *);
goto again;
Expand Down Expand Up @@ -733,15 +760,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
IPSTAT_INC(ips_fragmented);

done:
if (ro == &iproute)
RO_RTFREE(ro);
else if (rte == NULL)
/*
* If the caller supplied a route but somehow the reference
* to it has been released need to prevent the caller
* calling RTFREE on it again.
*/
ro->ro_rt = NULL;
NET_EPOCH_EXIT(et);
return (error);
bad:
Expand Down

0 comments on commit c150a0f

Please sign in to comment.