Skip to content

Commit

Permalink
Implement interface link header precomputation API.
Browse files Browse the repository at this point in the history
Add if_requestencap() interface method which is capable of calculating
  various link headers for given interface. Right now there is support
  for INET/INET6/ARP llheader calculation (IFENCAP_LL type request).
  Other types are planned to support more complex calculation
  (L2 multipath lagg nexthops, tunnel encap nexthops, etc..).

Reshape 'struct route' to be able to pass additional data (with is length)
  to prepend to mbuf.

These two changes permits routing code to pass pre-calculated nexthop data
  (like L2 header for route w/gateway) down to the stack eliminating the
  need for other lookups. It also brings us closer to more complex scenarios
  like transparently handling MPLS nexthops and tunnel interfaces.
  Last, but not least, it removes layering violation introduced by flowtable
  code (ro_lle) and simplifies handling of existing if_output consumers.

ARP/ND changes:
Make arp/ndp stack pre-calculate link header upon installing/updating lle
  record. Interface link address change are handled by re-calculating
  headers for all lles based on if_lladdr event. After these changes,
  arpresolve()/nd6_resolve() returns full pre-calculated header for
  supported interfaces thus simplifying if_output().
Move these lookups to separate ether_resolve_addr() function which ether
  returs error or fully-prepared link header. Add <arp|nd6_>resolve_addr()
  compat versions to return link addresses instead of pre-calculated data.

BPF changes:
Raw bpf writes occupied _two_ cases: AF_UNSPEC and pseudo_AF_HDRCMPLT.
Despite the naming, both of there have ther header "complete". The only
  difference is that interface source mac has to be filled by OS for
  AF_UNSPEC (controlled via BIOCGHDRCMPLT). This logic has to stay inside
  BPF and not pollute if_output() routines. Convert BPF to pass prepend data
  via new 'struct route' mechanism. Note that it does not change
  non-optimized if_output(): ro_prepend handling is purely optional.
Side note: hackish pseudo_AF_HDRCMPLT is supported for ethernet and FDDI.
  It is not needed for ethernet anymore. The only remaining FDDI user is
  dev/pdq mostly untouched since 2007. FDDI support was eliminated from
  OpenBSD in 2013 (sys/net/if_fddisubr.c rev 1.65).

Flowtable changes:
  Flowtable violates layering by saving (and not correctly managing)
  rtes/lles. Instead of passing lle pointer, pass pointer to pre-calculated
  header data from that lle.

Differential Revision:	https://reviews.freebsd.org/D4102
  • Loading branch information
AlexanderChernikov committed Dec 31, 2015
1 parent 2bfd3df commit 4fb3a82
Show file tree
Hide file tree
Showing 22 changed files with 672 additions and 213 deletions.
2 changes: 1 addition & 1 deletion sys/dev/cxgb/ulp/tom/cxgb_l2t.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct tom_data *td = sc->tom_softc;
struct toedev *tod = &td->tod;
struct sockaddr_in sin = {0};
uint8_t dmac[ETHER_ADDR_LEN];
uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = EVL_VLID_MASK;
int rc;

Expand Down
2 changes: 1 addition & 1 deletion sys/dev/cxgbe/tom/t4_tom_l2t.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ resolve_entry(struct adapter *sc, struct l2t_entry *e)
struct sockaddr_in sin = {0};
struct sockaddr_in6 sin6 = {0};
struct sockaddr *sa;
uint8_t dmac[ETHER_ADDR_LEN];
uint8_t dmac[ETHER_HDR_LEN];
uint16_t vtag = VLAN_NONE;
int rc;

Expand Down
24 changes: 19 additions & 5 deletions sys/net/bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@ __FBSDID("$FreeBSD$");

#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/bpf.h>
#include <net/bpf_buffer.h>
#ifdef BPF_JITTER
#include <net/bpf_jitter.h>
#endif
#include <net/bpf_zerocopy.h>
#include <net/bpfdesc.h>
#include <net/route.h>
#include <net/vnet.h>

#include <netinet/in.h>
Expand Down Expand Up @@ -164,7 +166,7 @@ static void bpf_detachd(struct bpf_d *);
static void bpf_detachd_locked(struct bpf_d *);
static void bpf_freed(struct bpf_d *);
static int bpf_movein(struct uio *, int, struct ifnet *, struct mbuf **,
struct sockaddr *, int *, struct bpf_insn *);
struct sockaddr *, int *, struct bpf_d *);
static int bpf_setif(struct bpf_d *, struct ifreq *);
static void bpf_timed_out(void *);
static __inline void
Expand Down Expand Up @@ -454,7 +456,7 @@ bpf_ioctl_setzbuf(struct thread *td, struct bpf_d *d, struct bpf_zbuf *bz)
*/
static int
bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
struct sockaddr *sockp, int *hdrlen, struct bpf_insn *wfilter)
struct sockaddr *sockp, int *hdrlen, struct bpf_d *d)
{
const struct ieee80211_bpf_params *p;
struct ether_header *eh;
Expand Down Expand Up @@ -549,7 +551,7 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
if (error)
goto bad;

slen = bpf_filter(wfilter, mtod(m, u_char *), len, len);
slen = bpf_filter(d->bd_wfilter, mtod(m, u_char *), len, len);
if (slen == 0) {
error = EPERM;
goto bad;
Expand All @@ -566,6 +568,10 @@ bpf_movein(struct uio *uio, int linktype, struct ifnet *ifp, struct mbuf **mp,
else
m->m_flags |= M_MCAST;
}
if (d->bd_hdrcmplt == 0) {
memcpy(eh->ether_shost, IF_LLADDR(ifp),
sizeof(eh->ether_shost));
}
break;
}

Expand Down Expand Up @@ -1088,6 +1094,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
struct ifnet *ifp;
struct mbuf *m, *mc;
struct sockaddr dst;
struct route ro;
int error, hlen;

error = devfs_get_cdevpriv((void **)&d);
Expand Down Expand Up @@ -1119,7 +1126,7 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
hlen = 0;
/* XXX: bpf_movein() can sleep */
error = bpf_movein(uio, (int)d->bd_bif->bif_dlt, ifp,
&m, &dst, &hlen, d->bd_wfilter);
&m, &dst, &hlen, d);
if (error) {
d->bd_wdcount++;
return (error);
Expand Down Expand Up @@ -1151,7 +1158,14 @@ bpfwrite(struct cdev *dev, struct uio *uio, int ioflag)
BPFD_UNLOCK(d);
#endif

error = (*ifp->if_output)(ifp, m, &dst, NULL);
bzero(&ro, sizeof(ro));
if (hlen != 0) {
ro.ro_prepend = (u_char *)&dst.sa_data;
ro.ro_plen = hlen;
ro.ro_flags = RT_HAS_HEADER;
}

error = (*ifp->if_output)(ifp, m, &dst, &ro);
if (error)
d->bd_wdcount++;

Expand Down
10 changes: 9 additions & 1 deletion sys/net/flowtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -665,6 +665,7 @@ int
flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
{
struct flentry *fle;
struct llentry *lle;

if (V_flowtable_enable == 0)
return (ENXIO);
Expand Down Expand Up @@ -693,8 +694,15 @@ flowtable_lookup(sa_family_t sa, struct mbuf *m, struct route *ro)
}

ro->ro_rt = fle->f_rt;
ro->ro_lle = fle->f_lle;
ro->ro_flags |= RT_NORTREF;
lle = fle->f_lle;
if (lle != NULL && (lle->la_flags & LLE_VALID)) {
ro->ro_prepend = lle->r_linkdata;
ro->ro_plen = lle->r_hdrlen;
ro->ro_flags |= RT_MAY_LOOP;
if (lle->la_flags & LLE_IFADDR)
ro->ro_flags |= RT_L2_ME;
}

return (0);
}
Expand Down
41 changes: 41 additions & 0 deletions sys/net/if.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
static void if_grow(void);
static void if_input_default(struct ifnet *, struct mbuf *);
static int if_requestencap_default(struct ifnet *, struct if_encap_req *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
Expand Down Expand Up @@ -673,6 +674,9 @@ if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc)
if (ifp->if_input == NULL)
ifp->if_input = if_input_default;

if (ifp->if_requestencap == NULL)
ifp->if_requestencap = if_requestencap_default;

if (!vmove) {
#ifdef MAC
mac_ifnet_create(ifp);
Expand Down Expand Up @@ -3397,6 +3401,43 @@ if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
return (0);
}

/*
* Compat function for handling basic encapsulation requests.
* Not converted stacks (FDDI, IB, ..) supports traditional
* output model: ARP (and other similar L2 protocols) are handled
* inside output routine, arpresolve/nd6_resolve() returns MAC
* address instead of full prepend.
*
* This function creates calculated header==MAC for IPv4/IPv6 and
* returns EAFNOSUPPORT (which is then handled in ARP code) for other
* address families.
*/
static int
if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req)
{

if (req->rtype != IFENCAP_LL)
return (EOPNOTSUPP);

if (req->bufsize < req->lladdr_len)
return (ENOMEM);

switch (req->family) {
case AF_INET:
case AF_INET6:
break;
default:
return (EAFNOSUPPORT);
}

/* Copy lladdr to storage as is */
memmove(req->buf, req->lladdr, req->lladdr_len);
req->bufsize = req->lladdr_len;
req->lladdr_off = 0;

return (0);
}

/*
* The name argument must be a pointer to storage which will last as
* long as the interface does. For physical devices, the result of
Expand Down
Loading

0 comments on commit 4fb3a82

Please sign in to comment.