201 changes: 17 additions & 184 deletions sys/dev/netmap/if_lem_netmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,22 @@ lem_netmap_reg(struct netmap_adapter *na, int onoff)
}


static void
lem_netmap_intr(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;

EM_CORE_LOCK(adapter);
if (onoff) {
lem_enable_intr(adapter);
} else {
lem_disable_intr(adapter);
}
EM_CORE_UNLOCK(adapter);
}


/*
* Reconcile kernel and user view of the transmit ring.
*/
Expand All @@ -99,10 +115,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)

/* device-specific */
struct adapter *adapter = ifp->if_softc;
#ifdef NIC_PARAVIRT
struct paravirt_csb *csb = adapter->csb;
uint64_t *csbd = (uint64_t *)(csb + 1);
#endif /* NIC_PARAVIRT */

bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_POSTREAD);
Expand All @@ -113,19 +125,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)

nm_i = kring->nr_hwcur;
if (nm_i != head) { /* we have new packets to send */
#ifdef NIC_PARAVIRT
int do_kick = 0;
uint64_t t = 0; // timestamp
int n = head - nm_i;
if (n < 0)
n += lim + 1;
if (csb) {
t = rdtsc(); /* last timestamp */
csbd[16] += t - csbd[0]; /* total Wg */
csbd[17] += n; /* Wg count */
csbd[0] = t;
}
#endif /* NIC_PARAVIRT */
nic_i = netmap_idx_k2n(kring, nm_i);
while (nm_i != head) {
struct netmap_slot *slot = &ring->slot[nm_i];
Expand Down Expand Up @@ -166,38 +165,8 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);

#ifdef NIC_PARAVIRT
/* set unconditionally, then also kick if needed */
if (csb) {
t = rdtsc();
if (csb->host_need_txkick == 2) {
/* can compute an update of delta */
int64_t delta = t - csbd[3];
if (delta < 0)
delta = -delta;
if (csbd[8] == 0 || delta < csbd[8]) {
csbd[8] = delta;
csbd[9]++;
}
csbd[10]++;
}
csb->guest_tdt = nic_i;
csbd[18] += t - csbd[0]; // total wp
csbd[19] += n;
}
if (!csb || !csb->guest_csb_on || (csb->host_need_txkick & 1))
do_kick = 1;
if (do_kick)
#endif /* NIC_PARAVIRT */
/* (re)start the tx unit up to slot nic_i (excluded) */
E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), nic_i);
#ifdef NIC_PARAVIRT
if (do_kick) {
uint64_t t1 = rdtsc();
csbd[20] += t1 - t; // total Np
csbd[21]++;
}
#endif /* NIC_PARAVIRT */
}

/*
Expand All @@ -206,93 +175,6 @@ lem_netmap_txsync(struct netmap_kring *kring, int flags)
if (ticks != kring->last_reclaim || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
kring->last_reclaim = ticks;
/* record completed transmissions using TDH */
#ifdef NIC_PARAVIRT
/* host updates tdh unconditionally, and we have
* no side effects on reads, so we can read from there
* instead of exiting.
*/
if (csb) {
static int drain = 0, nodrain=0, good = 0, bad = 0, fail = 0;
u_int x = adapter->next_tx_to_clean;
csbd[19]++; // XXX count reclaims
nic_i = csb->host_tdh;
if (csb->guest_csb_on) {
if (nic_i == x) {
bad++;
csbd[24]++; // failed reclaims
/* no progress, request kick and retry */
csb->guest_need_txkick = 1;
mb(); // XXX barrier
nic_i = csb->host_tdh;
} else {
good++;
}
if (nic_i != x) {
csb->guest_need_txkick = 2;
if (nic_i == csb->guest_tdt)
drain++;
else
nodrain++;
#if 1
if (netmap_adaptive_io) {
/* new mechanism: last half ring (or so)
* released one slot at a time.
* This effectively makes the system spin.
*
* Take next_to_clean + 1 as a reference.
* tdh must be ahead or equal
* On entry, the logical order is
* x < tdh = nic_i
* We first push tdh up to avoid wraps.
* The limit is tdh-ll (half ring).
* if tdh-256 < x we report x;
* else we report tdh-256
*/
u_int tdh = nic_i;
u_int ll = csbd[15];
u_int delta = lim/8;
if (netmap_adaptive_io == 2 || ll > delta)
csbd[15] = ll = delta;
else if (netmap_adaptive_io == 1 && ll > 1) {
csbd[15]--;
}

if (nic_i >= kring->nkr_num_slots) {
RD(5, "bad nic_i %d on input", nic_i);
}
x = nm_next(x, lim);
if (tdh < x)
tdh += lim + 1;
if (tdh <= x + ll) {
nic_i = x;
csbd[25]++; //report n + 1;
} else {
tdh = nic_i;
if (tdh < ll)
tdh += lim + 1;
nic_i = tdh - ll;
csbd[26]++; // report tdh - ll
}
}
#endif
} else {
/* we stop, count whether we are idle or not */
int bh_active = csb->host_need_txkick & 2 ? 4 : 0;
csbd[27+ csb->host_need_txkick]++;
if (netmap_adaptive_io == 1) {
if (bh_active && csbd[15] > 1)
csbd[15]--;
else if (!bh_active && csbd[15] < lim/2)
csbd[15]++;
}
bad--;
fail++;
}
}
RD(1, "drain %d nodrain %d good %d retry %d fail %d",
drain, nodrain, good, bad, fail);
} else
#endif /* !NIC_PARAVIRT */
nic_i = E1000_READ_REG(&adapter->hw, E1000_TDH(0));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
Expand Down Expand Up @@ -324,21 +206,10 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)

/* device-specific */
struct adapter *adapter = ifp->if_softc;
#ifdef NIC_PARAVIRT
struct paravirt_csb *csb = adapter->csb;
uint32_t csb_mode = csb && csb->guest_csb_on;
uint32_t do_host_rxkick = 0;
#endif /* NIC_PARAVIRT */

if (head > lim)
return netmap_ring_reinit(kring);

#ifdef NIC_PARAVIRT
if (csb_mode) {
force_update = 1;
csb->guest_need_rxkick = 0;
}
#endif /* NIC_PARAVIRT */
/* XXX check sync modes */
bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
Expand All @@ -357,23 +228,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
uint32_t staterr = le32toh(curr->status);
int len;

#ifdef NIC_PARAVIRT
if (csb_mode) {
if ((staterr & E1000_RXD_STAT_DD) == 0) {
/* don't bother to retry if more than 1 pkt */
if (n > 1)
break;
csb->guest_need_rxkick = 1;
wmb();
staterr = le32toh(curr->status);
if ((staterr & E1000_RXD_STAT_DD) == 0) {
break;
} else { /* we are good */
csb->guest_need_rxkick = 0;
}
}
} else
#endif /* NIC_PARAVIRT */
if ((staterr & E1000_RXD_STAT_DD) == 0)
break;
len = le16toh(curr->length) - 4; // CRC
Expand All @@ -390,18 +244,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
nic_i = nm_next(nic_i, lim);
}
if (n) { /* update the state variables */
#ifdef NIC_PARAVIRT
if (csb_mode) {
if (n > 1) {
/* leave one spare buffer so we avoid rxkicks */
nm_i = nm_prev(nm_i, lim);
nic_i = nm_prev(nic_i, lim);
n--;
} else {
csb->guest_need_rxkick = 1;
}
}
#endif /* NIC_PARAVIRT */
ND("%d new packets at nic %d nm %d tail %d",
n,
adapter->next_rx_desc_to_check,
Expand Down Expand Up @@ -440,10 +282,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
curr->status = 0;
bus_dmamap_sync(adapter->rxtag, rxbuf->map,
BUS_DMASYNC_PREREAD);
#ifdef NIC_PARAVIRT
if (csb_mode && csb->host_rxkick_at == nic_i)
do_host_rxkick = 1;
#endif /* NIC_PARAVIRT */
nm_i = nm_next(nm_i, lim);
nic_i = nm_next(nic_i, lim);
}
Expand All @@ -455,12 +293,6 @@ lem_netmap_rxsync(struct netmap_kring *kring, int flags)
* so move nic_i back by one unit
*/
nic_i = nm_prev(nic_i, lim);
#ifdef NIC_PARAVIRT
/* set unconditionally, then also kick if needed */
if (csb)
csb->guest_rdt = nic_i;
if (!csb_mode || do_host_rxkick)
#endif /* NIC_PARAVIRT */
E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), nic_i);
}

Expand All @@ -486,6 +318,7 @@ lem_netmap_attach(struct adapter *adapter)
na.nm_rxsync = lem_netmap_rxsync;
na.nm_register = lem_netmap_reg;
na.num_tx_rings = na.num_rx_rings = 1;
na.nm_intr = lem_netmap_intr;
netmap_attach(&na);
}

Expand Down
21 changes: 18 additions & 3 deletions sys/dev/netmap/ixgbe_netmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void ixgbe_netmap_attach(struct adapter *adapter);
/*
* device-specific sysctl variables:
*
* ix_crcstrip: 0: keep CRC in rx frames (default), 1: strip it.
* ix_crcstrip: 0: NIC keeps CRC in rx frames (default), 1: NIC strips it.
* During regular operations the CRC is stripped, but on some
* hardware reception of frames not multiple of 64 is slower,
* so using crcstrip=0 helps in benchmarks.
Expand All @@ -65,7 +65,7 @@ SYSCTL_DECL(_dev_netmap);
static int ix_rx_miss, ix_rx_miss_bufs;
int ix_crcstrip;
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_crcstrip,
CTLFLAG_RW, &ix_crcstrip, 0, "strip CRC on rx frames");
CTLFLAG_RW, &ix_crcstrip, 0, "NIC strips CRC on rx frames");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss,
CTLFLAG_RW, &ix_rx_miss, 0, "potentially missed rx intr");
SYSCTL_INT(_dev_netmap, OID_AUTO, ix_rx_miss_bufs,
Expand Down Expand Up @@ -109,6 +109,20 @@ set_crcstrip(struct ixgbe_hw *hw, int onoff)
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rxc);
}

static void
ixgbe_netmap_intr(struct netmap_adapter *na, int onoff)
{
struct ifnet *ifp = na->ifp;
struct adapter *adapter = ifp->if_softc;

IXGBE_CORE_LOCK(adapter);
if (onoff) {
ixgbe_enable_intr(adapter); // XXX maybe ixgbe_stop ?
} else {
ixgbe_disable_intr(adapter); // XXX maybe ixgbe_stop ?
}
IXGBE_CORE_UNLOCK(adapter);
}

/*
* Register/unregister. We are already under netmap lock.
Expand Down Expand Up @@ -311,7 +325,7 @@ ixgbe_netmap_txsync(struct netmap_kring *kring, int flags)
* good way.
*/
nic_i = IXGBE_READ_REG(&adapter->hw, IXGBE_IS_VF(adapter) ?
IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
IXGBE_VFTDH(kring->ring_id) : IXGBE_TDH(kring->ring_id));
if (nic_i >= kring->nkr_num_slots) { /* XXX can it happen ? */
D("TDH wrap %d", nic_i);
nic_i -= kring->nkr_num_slots;
Expand Down Expand Up @@ -486,6 +500,7 @@ ixgbe_netmap_attach(struct adapter *adapter)
na.nm_rxsync = ixgbe_netmap_rxsync;
na.nm_register = ixgbe_netmap_reg;
na.num_tx_rings = na.num_rx_rings = adapter->num_queues;
na.nm_intr = ixgbe_netmap_intr;
netmap_attach(&na);
}

Expand Down
1,252 changes: 709 additions & 543 deletions sys/dev/netmap/netmap.c

Large diffs are not rendered by default.

762 changes: 701 additions & 61 deletions sys/dev/netmap/netmap_freebsd.c

Large diffs are not rendered by default.

936 changes: 669 additions & 267 deletions sys/dev/netmap/netmap_generic.c

Large diffs are not rendered by default.

658 changes: 536 additions & 122 deletions sys/dev/netmap/netmap_kern.h

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions sys/dev/netmap/netmap_mbq.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
* Copyright (C) 2013-2014 Vincenzo Maffione
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -30,6 +31,8 @@

#ifdef linux
#include "bsd_glue.h"
#elif defined (_WIN32)
#include "win_glue.h"
#else /* __FreeBSD__ */
#include <sys/param.h>
#include <sys/lock.h>
Expand Down Expand Up @@ -152,12 +155,12 @@ void mbq_safe_purge(struct mbq *q)
}


void mbq_safe_destroy(struct mbq *q)
void mbq_safe_fini(struct mbq *q)
{
mtx_destroy(&q->lock);
}


void mbq_destroy(struct mbq *q)
void mbq_fini(struct mbq *q)
{
}
18 changes: 13 additions & 5 deletions sys/dev/netmap/netmap_mbq.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2013-2014 Vincenzo Maffione. All rights reserved.
* Copyright (C) 2013-2014 Vincenzo Maffione
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -40,6 +41,8 @@
/* XXX probably rely on a previous definition of SPINLOCK_T */
#ifdef linux
#define SPINLOCK_T safe_spinlock_t
#elif defined (_WIN32)
#define SPINLOCK_T win_spinlock_t
#else
#define SPINLOCK_T struct mtx
#endif
Expand All @@ -52,16 +55,21 @@ struct mbq {
SPINLOCK_T lock;
};

/* XXX "destroy" does not match "init" as a name.
* We should also clarify whether init can be used while
/* We should clarify whether init can be used while
* holding a lock, and whether mbq_safe_destroy() is a NOP.
*/
void mbq_init(struct mbq *q);
void mbq_destroy(struct mbq *q);
void mbq_fini(struct mbq *q);
void mbq_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_dequeue(struct mbq *q);
void mbq_purge(struct mbq *q);

static inline struct mbuf *
mbq_peek(struct mbq *q)
{
return q->head ? q->head : NULL;
}

static inline void
mbq_lock(struct mbq *q)
{
Expand All @@ -76,7 +84,7 @@ mbq_unlock(struct mbq *q)


void mbq_safe_init(struct mbq *q);
void mbq_safe_destroy(struct mbq *q);
void mbq_safe_fini(struct mbq *q);
void mbq_safe_enqueue(struct mbq *q, struct mbuf *m);
struct mbuf *mbq_safe_dequeue(struct mbq *q);
void mbq_safe_purge(struct mbq *q);
Expand Down
932 changes: 876 additions & 56 deletions sys/dev/netmap/netmap_mem2.c

Large diffs are not rendered by default.

20 changes: 18 additions & 2 deletions sys/dev/netmap/netmap_mem2.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
/*
* Copyright (C) 2012-2014 Matteo Landi, Luigi Rizzo, Giuseppe Lettieri. All rights reserved.
* Copyright (C) 2012-2014 Matteo Landi
* Copyright (C) 2012-2016 Luigi Rizzo
* Copyright (C) 2012-2016 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -117,8 +120,11 @@

extern struct netmap_mem_d nm_mem;

void netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
int netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
#ifdef _WIN32
PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
#endif
int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem_init(void);
void netmap_mem_fini(void);
Expand All @@ -127,6 +133,7 @@ void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
int netmap_mem_rings_create(struct netmap_adapter *);
void netmap_mem_rings_delete(struct netmap_adapter *);
void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
struct netmap_mem_d* netmap_mem_private_new(const char *name,
Expand Down Expand Up @@ -157,6 +164,15 @@ void netmap_mem_put(struct netmap_mem_d *);

#endif /* !NM_DEBUG_PUTGET */

#ifdef WITH_PTNETMAP_GUEST
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
unsigned int nifp_offset,
nm_pt_guest_ptctl_t);
struct ptnetmap_memdev;
struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t);
int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
#endif /* WITH_PTNETMAP_GUEST */

#define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
#define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */

Expand Down
112 changes: 74 additions & 38 deletions sys/dev/netmap/netmap_monitor.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
* Copyright (C) 2014-2016 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -101,6 +102,8 @@
#warning OSX support is only partial
#include "osx_glue.h"

#elif defined(_WIN32)
#include "win_glue.h"
#else

#error Unsupported platform
Expand Down Expand Up @@ -151,13 +154,17 @@ netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
}

/* nm_krings_create callbacks for monitors.
* We could use the default netmap_hw_krings_zmon, but
* we don't need the mbq.
*/
static int
netmap_monitor_krings_create(struct netmap_adapter *na)
{
return netmap_krings_create(na, 0);
int error = netmap_krings_create(na, 0);
if (error)
return error;
/* override the host rings callbacks */
na->tx_rings[na->num_tx_rings].nm_sync = netmap_monitor_txsync;
na->rx_rings[na->num_rx_rings].nm_sync = netmap_monitor_rxsync;
return 0;
}

/* nm_krings_delete callback for monitors */
Expand Down Expand Up @@ -186,7 +193,11 @@ nm_monitor_alloc(struct netmap_kring *kring, u_int n)
return 0;

len = sizeof(struct netmap_kring *) * n;
#ifndef _WIN32
nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
#else
nm = realloc(kring->monitors, len, sizeof(struct netmap_kring *)*kring->max_monitors);
#endif
if (nm == NULL)
return ENOMEM;

Expand Down Expand Up @@ -229,10 +240,10 @@ static int netmap_monitor_parent_notify(struct netmap_kring *, int);
static int
netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
{
int error = 0;
int error = NM_IRQ_COMPLETED;

/* sinchronize with concurrently running nm_sync()s */
nm_kr_get(kring);
nm_kr_stop(kring, NM_KR_LOCKED);
/* make sure the monitor array exists and is big enough */
error = nm_monitor_alloc(kring, kring->n_monitors + 1);
if (error)
Expand All @@ -242,7 +253,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
kring->n_monitors++;
if (kring->n_monitors == 1) {
/* this is the first monitor, intercept callbacks */
D("%s: intercept callbacks on %s", mkring->name, kring->name);
ND("%s: intercept callbacks on %s", mkring->name, kring->name);
kring->mon_sync = kring->nm_sync;
/* zcopy monitors do not override nm_notify(), but
* we save the original one regardless, so that
Expand All @@ -265,7 +276,7 @@ netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int
}

out:
nm_kr_put(kring);
nm_kr_start(kring);
return error;
}

Expand All @@ -277,7 +288,7 @@ static void
netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
{
/* sinchronize with concurrently running nm_sync()s */
nm_kr_get(kring);
nm_kr_stop(kring, NM_KR_LOCKED);
kring->n_monitors--;
if (mkring->mon_pos != kring->n_monitors) {
kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
Expand All @@ -286,18 +297,18 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
kring->monitors[kring->n_monitors] = NULL;
if (kring->n_monitors == 0) {
/* this was the last monitor, restore callbacks and delete monitor array */
D("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
ND("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
kring->nm_sync = kring->mon_sync;
kring->mon_sync = NULL;
if (kring->tx == NR_RX) {
D("%s: restoring notify on %s: %p",
ND("%s: restoring notify on %s: %p",
mkring->name, kring->name, kring->mon_notify);
kring->nm_notify = kring->mon_notify;
kring->mon_notify = NULL;
}
nm_monitor_dealloc(kring);
}
nm_kr_put(kring);
nm_kr_start(kring);
}


Expand All @@ -316,7 +327,7 @@ netmap_monitor_stop(struct netmap_adapter *na)
for_rx_tx(t) {
u_int i;

for (i = 0; i < nma_get_nrings(na, t); i++) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
struct netmap_kring *kring = &NMR(na, t)[i];
u_int j;

Expand Down Expand Up @@ -360,23 +371,32 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(pna, t)[i];
mkring = &na->rx_rings[i];
netmap_monitor_add(mkring, kring, zmon);
if (nm_kring_pending_on(mkring)) {
netmap_monitor_add(mkring, kring, zmon);
mkring->nr_mode = NKR_NETMAP_ON;
}
}
}
}
na->na_flags |= NAF_NETMAP_ON;
} else {
if (pna == NULL) {
D("%s: parent left netmap mode, nothing to restore", na->name);
return 0;
}
na->na_flags &= ~NAF_NETMAP_ON;
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
if (mna->flags & nm_txrx2flag(t)) {
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = &NMR(pna, t)[i];
mkring = &na->rx_rings[i];
netmap_monitor_del(mkring, kring);
if (nm_kring_pending_off(mkring)) {
mkring->nr_mode = NKR_NETMAP_OFF;
/* we cannot access the parent krings if the parent
* has left netmap mode. This is signaled by a NULL
* pna pointer
*/
if (pna) {
kring = &NMR(pna, t)[i];
netmap_monitor_del(mkring, kring);
}
}
}
}
}
Expand Down Expand Up @@ -652,17 +672,27 @@ netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
static int
netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
{
int (*notify)(struct netmap_kring*, int);
ND(5, "%s %x", kring->name, flags);
/* ?xsync callbacks have tryget called by their callers
* (NIOCREGIF and poll()), but here we have to call it
* by ourself
*/
if (nm_kr_tryget(kring))
goto out;
netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
if (nm_kr_tryget(kring, 0, NULL)) {
/* in all cases, just skip the sync */
return NM_IRQ_COMPLETED;
}
if (kring->n_monitors > 0) {
netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
notify = kring->mon_notify;
} else {
/* we are no longer monitoring this ring, so both
* mon_sync and mon_notify are NULL
*/
notify = kring->nm_notify;
}
nm_kr_put(kring);
out:
return kring->mon_notify(kring, flags);
return notify(kring, flags);
}


Expand Down Expand Up @@ -691,18 +721,25 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
struct nmreq pnmr;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_monitor_adapter *mna;
struct ifnet *ifp = NULL;
int i, error;
enum txrx t;
int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
char monsuff[10] = "";

if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
if (nmr->nr_flags & NR_ZCOPY_MON) {
/* the flag makes no sense unless you are
* creating a monitor
*/
return EINVAL;
}
ND("not a monitor");
return 0;
}
/* this is a request for a monitor adapter */

D("flags %x", nmr->nr_flags);
ND("flags %x", nmr->nr_flags);

mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
if (mna == NULL) {
Expand All @@ -716,13 +753,14 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
* except other monitors.
*/
memcpy(&pnmr, nmr, sizeof(pnmr));
pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX);
error = netmap_get_na(&pnmr, &pna, create);
pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
error = netmap_get_na(&pnmr, &pna, &ifp, create);
if (error) {
D("parent lookup failed: %d", error);
free(mna, M_DEVBUF);
return error;
}
D("found parent: %s", pna->name);
ND("found parent: %s", pna->name);

if (!nm_netmap_on(pna)) {
/* parent not in netmap mode */
Expand Down Expand Up @@ -829,19 +867,17 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
*na = &mna->up;
netmap_adapter_get(*na);

/* write the configuration back */
nmr->nr_tx_rings = mna->up.num_tx_rings;
nmr->nr_rx_rings = mna->up.num_rx_rings;
nmr->nr_tx_slots = mna->up.num_tx_desc;
nmr->nr_rx_slots = mna->up.num_rx_desc;

/* keep the reference to the parent */
D("monitor ok");
ND("monitor ok");

/* drop the reference to the ifp, if any */
if (ifp)
if_rele(ifp);

return 0;

put_out:
netmap_adapter_put(pna);
netmap_unget_na(pna, ifp);
free(mna, M_DEVBUF);
return error;
}
Expand Down
260 changes: 174 additions & 86 deletions sys/dev/netmap/netmap_offloadings.c

Large diffs are not rendered by default.

156 changes: 83 additions & 73 deletions sys/dev/netmap/netmap_pipe.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2014 Giuseppe Lettieri. All rights reserved.
* Copyright (C) 2014-2016 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -54,6 +55,9 @@
#warning OSX support is only partial
#include "osx_glue.h"

#elif defined(_WIN32)
#include "win_glue.h"

#else

#error Unsupported platform
Expand All @@ -72,9 +76,11 @@

#define NM_PIPE_MAXSLOTS 4096

int netmap_default_pipes = 0; /* ignored, kept for compatibility */
static int netmap_default_pipes = 0; /* ignored, kept for compatibility */
SYSBEGIN(vars_pipes);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, default_pipes, CTLFLAG_RW, &netmap_default_pipes, 0 , "");
SYSEND;

/* allocate the pipe array in the parent adapter */
static int
Expand All @@ -91,7 +97,11 @@ nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
return EINVAL;

len = sizeof(struct netmap_pipe_adapter *) * npipes;
#ifndef _WIN32
npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
#else
npa = realloc(na->na_pipes, len, sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes);
#endif
if (npa == NULL)
return ENOMEM;

Expand Down Expand Up @@ -199,7 +209,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
}

while (limit-- > 0) {
struct netmap_slot *rs = &rxkring->save_ring->slot[j];
struct netmap_slot *rs = &rxkring->ring->slot[j];
struct netmap_slot *ts = &txkring->ring->slot[k];
struct netmap_slot tmp;

Expand Down Expand Up @@ -295,7 +305,7 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
* usr1 --> e1 --> e2
*
* and we are e2. e1 is certainly registered and our
* krings already exist, but they may be hidden.
* krings already exist. Nothing to do.
*/
static int
netmap_pipe_krings_create(struct netmap_adapter *na)
Expand All @@ -310,65 +320,28 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
int i;

/* case 1) above */
ND("%p: case 1, create everything", na);
D("%p: case 1, create both ends", na);
error = netmap_krings_create(na, 0);
if (error)
goto err;

/* we also create all the rings, since we need to
* update the save_ring pointers.
* netmap_mem_rings_create (called by our caller)
* will not create the rings again
*/

error = netmap_mem_rings_create(na);
if (error)
goto del_krings1;

/* update our hidden ring pointers */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
NMR(na, t)[i].save_ring = NMR(na, t)[i].ring;
}

/* now, create krings and rings of the other end */
/* create the krings of the other end */
error = netmap_krings_create(ona, 0);
if (error)
goto del_rings1;

error = netmap_mem_rings_create(ona);
if (error)
goto del_krings2;

for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
NMR(ona, t)[i].save_ring = NMR(ona, t)[i].ring;
}
goto del_krings1;

/* cross link the krings */
for_rx_tx(t) {
enum txrx r= nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, t); i++) {
NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
}
}
} else {
int i;
/* case 2) above */
/* recover the hidden rings */
ND("%p: case 2, hidden rings", na);
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
NMR(na, t)[i].ring = NMR(na, t)[i].save_ring;
}

}
return 0;

del_krings2:
netmap_krings_delete(ona);
del_rings1:
netmap_mem_rings_delete(na);
del_krings1:
netmap_krings_delete(na);
err:
Expand All @@ -383,7 +356,8 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
*
* usr1 --> e1 --> e2
*
* and we are e1. Nothing special to do.
* and we are e1. Create the needed rings of the
* other end.
*
* 1.b) state is
*
Expand Down Expand Up @@ -412,14 +386,65 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona = &pna->peer->up;
int i, error = 0;
enum txrx t;

ND("%p: onoff %d", na, onoff);
if (onoff) {
na->na_flags |= NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
struct netmap_kring *kring = &NMR(na, t)[i];

if (nm_kring_pending_on(kring)) {
/* mark the partner ring as needed */
kring->pipe->nr_kflags |= NKR_NEEDRING;
}
}
}

/* create all missing needed rings on the other end */
error = netmap_mem_rings_create(ona);
if (error)
return error;

/* In case of no error we put our rings in netmap mode */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
struct netmap_kring *kring = &NMR(na, t)[i];

if (nm_kring_pending_on(kring)) {
kring->nr_mode = NKR_NETMAP_ON;
}
}
}
if (na->active_fds == 0)
na->na_flags |= NAF_NETMAP_ON;
} else {
na->na_flags &= ~NAF_NETMAP_ON;
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
struct netmap_kring *kring = &NMR(na, t)[i];

if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
/* mark the peer ring as no longer needed by us
* (it may still be kept if sombody else is using it)
*/
kring->pipe->nr_kflags &= ~NKR_NEEDRING;
}
}
}
/* delete all the peer rings that are no longer needed */
netmap_mem_rings_delete(ona);
}

if (na->active_fds) {
D("active_fds %d", na->active_fds);
return 0;
}

if (pna->peer_ref) {
ND("%p: case 1.a or 2.a, nothing to do", na);
return 0;
Expand All @@ -429,18 +454,11 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
pna->peer->peer_ref = 0;
netmap_adapter_put(na);
} else {
int i;
ND("%p: case 2.b, grab peer", na);
netmap_adapter_get(na);
pna->peer->peer_ref = 1;
/* hide our rings from netmap_mem_rings_delete */
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
NMR(na, t)[i].ring = NULL;
}
}
}
return 0;
return error;
}

/* netmap_pipe_krings_delete.
Expand Down Expand Up @@ -470,8 +488,6 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
struct netmap_pipe_adapter *pna =
(struct netmap_pipe_adapter *)na;
struct netmap_adapter *ona; /* na of the other end */
int i;
enum txrx t;

if (!pna->peer_ref) {
ND("%p: case 2, kept alive by peer", na);
Expand All @@ -480,18 +496,12 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
/* case 1) above */
ND("%p: case 1, deleting everyhing", na);
netmap_krings_delete(na); /* also zeroes tx_rings etc. */
/* restore the ring to be deleted on the peer */
ona = &pna->peer->up;
if (ona->tx_rings == NULL) {
/* already deleted, we must be on an
* cleanup-after-error path */
return;
}
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(ona, t) + 1; i++)
NMR(ona, t)[i].ring = NMR(ona, t)[i].save_ring;
}
netmap_mem_rings_delete(ona);
netmap_krings_delete(ona);
}

Expand Down Expand Up @@ -519,6 +529,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
struct nmreq pnmr;
struct netmap_adapter *pna; /* parent adapter */
struct netmap_pipe_adapter *mna, *sna, *req;
struct ifnet *ifp = NULL;
u_int pipe_id;
int role = nmr->nr_flags & NR_REG_MASK;
int error;
Expand All @@ -536,7 +547,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
/* pass to parent the requested number of pipes */
pnmr.nr_arg1 = nmr->nr_arg1;
error = netmap_get_na(&pnmr, &pna, create);
error = netmap_get_na(&pnmr, &pna, &ifp, create);
if (error) {
ND("parent lookup failed: %d", error);
return error;
Expand Down Expand Up @@ -652,16 +663,15 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
*na = &req->up;
netmap_adapter_get(*na);

/* write the configuration back */
nmr->nr_tx_rings = req->up.num_tx_rings;
nmr->nr_rx_rings = req->up.num_rx_rings;
nmr->nr_tx_slots = req->up.num_tx_desc;
nmr->nr_rx_slots = req->up.num_rx_desc;

/* keep the reference to the parent.
* It will be released by the req destructor
*/

/* drop the ifp reference, if any */
if (ifp) {
if_rele(ifp);
}

return 0;

free_sna:
Expand All @@ -671,7 +681,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
free_mna:
free(mna, M_DEVBUF);
put_out:
netmap_adapter_put(pna);
netmap_unget_na(pna, ifp);
return error;
}

Expand Down
665 changes: 508 additions & 157 deletions sys/dev/netmap/netmap_vale.c

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions sys/modules/netmap/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
# Compile netmap as a module, useful if you want a netmap bridge
# or loadable drivers.

.include <bsd.own.mk> # FreeBSD 10 and earlier
# .include "${SYSDIR}/conf/kern.opts.mk"

.PATH: ${.CURDIR}/../../dev/netmap
.PATH.h: ${.CURDIR}/../../net
CFLAGS += -I${.CURDIR}/../../
CFLAGS += -I${.CURDIR}/../../ -D INET
KMOD = netmap
SRCS = device_if.h bus_if.h opt_netmap.h
SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
SRCS += netmap_mem2.c netmap_mem2.h
SRCS += netmap_generic.c
Expand All @@ -17,5 +20,8 @@ SRCS += netmap_freebsd.c
SRCS += netmap_offloadings.c
SRCS += netmap_pipe.c
SRCS += netmap_monitor.c
SRCS += netmap_pt.c
SRCS += if_ptnet.c
SRCS += opt_inet.h opt_inet6.h

.include <bsd.kmod.mk>
109 changes: 108 additions & 1 deletion sys/net/netmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,26 @@
* netmap:foo-k the k-th NIC ring pair
* netmap:foo{k PIPE ring pair k, master side
* netmap:foo}k PIPE ring pair k, slave side
*
* Some notes about host rings:
*
* + The RX host ring is used to store those packets that the host network
* stack is trying to transmit through a NIC queue, but only if that queue
* is currently in netmap mode. Netmap will not intercept host stack mbufs
* designated to NIC queues that are not in netmap mode. As a consequence,
* registering a netmap port with netmap:foo^ is not enough to intercept
* mbufs in the RX host ring; the netmap port should be registered with
* netmap:foo*, or another registration should be done to open at least a
* NIC TX queue in netmap mode.
*
* + Netmap is not currently able to deal with intercepted trasmit mbufs which
* require offloadings like TSO, UFO, checksumming offloadings, etc. It is
* responsibility of the user to disable those offloadings (e.g. using
* ifconfig on FreeBSD or ethtool -K on Linux) for an interface that is being
* used in netmap mode. If the offloadings are not disabled, GSO and/or
* unchecksummed packets may be dropped immediately or end up in the host RX
* ring, and will be dropped as soon as the packet reaches another netmap
* adapter.
*/

/*
Expand Down Expand Up @@ -277,7 +297,11 @@ struct netmap_ring {
struct timeval ts; /* (k) time of last *sync() */

/* opaque room for a mutex or similar object */
uint8_t sem[128] __attribute__((__aligned__(NM_CACHE_ALIGN)));
#if !defined(_WIN32) || defined(__CYGWIN__)
uint8_t __attribute__((__aligned__(NM_CACHE_ALIGN))) sem[128];
#else
uint8_t __declspec(align(NM_CACHE_ALIGN)) sem[128];
#endif

/* the slots follow. This struct has variable size */
struct netmap_slot slot[0]; /* array of slots. */
Expand Down Expand Up @@ -496,6 +520,11 @@ struct nmreq {
#define NETMAP_BDG_OFFSET NETMAP_BDG_VNET_HDR /* deprecated alias */
#define NETMAP_BDG_NEWIF 6 /* create a virtual port */
#define NETMAP_BDG_DELIF 7 /* destroy a virtual port */
#define NETMAP_PT_HOST_CREATE 8 /* create ptnetmap kthreads */
#define NETMAP_PT_HOST_DELETE 9 /* delete ptnetmap kthreads */
#define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */
#define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */
#define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */
uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */
#define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */

Expand All @@ -521,7 +550,61 @@ enum { NR_REG_DEFAULT = 0, /* backward compat, should not be used. */
#define NR_ZCOPY_MON 0x400
/* request exclusive access to the selected rings */
#define NR_EXCLUSIVE 0x800
/* request ptnetmap host support */
#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
#define NR_PTNETMAP_HOST 0x1000
#define NR_RX_RINGS_ONLY 0x2000
#define NR_TX_RINGS_ONLY 0x4000
/* Applications set this flag if they are able to deal with virtio-net headers,
* that is send/receive frames that start with a virtio-net header.
* If not set, NIOCREGIF will fail with netmap ports that require applications
* to use those headers. If the flag is set, the application can use the
* NETMAP_VNET_HDR_GET command to figure out the header length. */
#define NR_ACCEPT_VNET_HDR 0x8000

#define NM_BDG_NAME "vale" /* prefix for bridge port name */

/*
* Windows does not have _IOWR(). _IO(), _IOW() and _IOR() are defined
* in ws2def.h but not sure if they are in the form we need.
* XXX so we redefine them
* in a convenient way to use for DeviceIoControl signatures
*/
#ifdef _WIN32
#undef _IO // ws2def.h
#define _WIN_NM_IOCTL_TYPE 40000
#define _IO(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
METHOD_BUFFERED, FILE_ANY_ACCESS )
#define _IO_direct(_c, _n) CTL_CODE(_WIN_NM_IOCTL_TYPE, ((_n) + 0x800) , \
METHOD_OUT_DIRECT, FILE_ANY_ACCESS )

#define _IOWR(_c, _n, _s) _IO(_c, _n)

/* We havesome internal sysctl in addition to the externally visible ones */
#define NETMAP_MMAP _IO_direct('i', 160) // note METHOD_OUT_DIRECT
#define NETMAP_POLL _IO('i', 162)

/* and also two setsockopt for sysctl emulation */
#define NETMAP_SETSOCKOPT _IO('i', 140)
#define NETMAP_GETSOCKOPT _IO('i', 141)


//These linknames are for the Netmap Core Driver
#define NETMAP_NT_DEVICE_NAME L"\\Device\\NETMAP"
#define NETMAP_DOS_DEVICE_NAME L"\\DosDevices\\netmap"

//Definition of a structure used to pass a virtual address within an IOCTL
typedef struct _MEMORY_ENTRY {
PVOID pUsermodeVirtualAddress;
} MEMORY_ENTRY, *PMEMORY_ENTRY;

typedef struct _POLL_REQUEST_DATA {
int events;
int timeout;
int revents;
} POLL_REQUEST_DATA;

#endif /* _WIN32 */

/*
* FreeBSD uses the size value embedded in the _IOWR to determine
Expand Down Expand Up @@ -561,4 +644,28 @@ struct nm_ifreq {
char data[NM_IFRDATA_LEN];
};

/*
* netmap kernel thread configuration
*/
/* bhyve/vmm.ko MSIX parameters for IOCTL */
struct ptn_vmm_ioctl_msix {
uint64_t msg;
uint64_t addr;
};

/* IOCTL parameters */
struct nm_kth_ioctl {
u_long com;
/* TODO: use union */
union {
struct ptn_vmm_ioctl_msix msix;
} data;
};

/* Configuration of a ptnetmap ring */
struct ptnet_ring_cfg {
uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */
uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */
struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */
};
#endif /* _NET_NETMAP_H_ */
422 changes: 381 additions & 41 deletions sys/net/netmap_user.h

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions tools/tools/netmap/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
#
# For multiple programs using a single source file each,
# we can just define 'progs' and create custom targets.
PROGS = pkt-gen bridge vale-ctl
PROGS = pkt-gen nmreplay bridge vale-ctl

CLEANFILES = $(PROGS) *.o
MAN=
CFLAGS += -Werror -Wall # -nostdinc -I/usr/include -I../../../sys
CFLAGS += -Werror -Wall
CFLAGS += -nostdinc -I ../../../sys -I/usr/include
CFLAGS += -Wextra

LDFLAGS += -lpthread
Expand All @@ -16,6 +17,7 @@ CFLAGS += -DNO_PCAP
.else
LDFLAGS += -lpcap
.endif
LDFLAGS += -lm # used by nmreplay

.include <bsd.prog.mk>
.include <bsd.lib.mk>
Expand All @@ -28,5 +30,8 @@ pkt-gen: pkt-gen.o
bridge: bridge.o
$(CC) $(CFLAGS) -o bridge bridge.o

nmreplay: nmreplay.o
$(CC) $(CFLAGS) -o nmreplay nmreplay.o $(LDFLAGS)

vale-ctl: vale-ctl.o
$(CC) $(CFLAGS) -o vale-ctl vale-ctl.o
30 changes: 24 additions & 6 deletions tools/tools/netmap/bridge.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ static void
usage(void)
{
fprintf(stderr,
"usage: bridge [-v] [-i ifa] [-i ifb] [-b burst] [-w wait_time] [iface]\n");
"usage: bridge [-v] [-i ifa] [-i ifb] [-b burst] [-w wait_time] [ifa [ifb [burst]]]\n");
exit(1);
}

Expand Down Expand Up @@ -201,12 +201,12 @@ main(int argc, char **argv)
argc -= optind;
argv += optind;

if (argc > 0)
ifa = argv[0];
if (argc > 1)
ifa = argv[1];
ifb = argv[1];
if (argc > 2)
ifb = argv[2];
if (argc > 3)
burst = atoi(argv[3]);
burst = atoi(argv[2]);
if (!ifb)
ifb = ifa;
if (!ifa) {
Expand All @@ -233,7 +233,7 @@ main(int argc, char **argv)
D("cannot open %s", ifa);
return (1);
}
// XXX use a single mmap ?
/* try to reuse the mmap() of the first interface, if possible */
pb = nm_open(ifb, NULL, NM_OPEN_NO_MMAP, pa);
if (pb == NULL) {
D("cannot open %s", ifb);
Expand Down Expand Up @@ -262,6 +262,23 @@ main(int argc, char **argv)
pollfd[0].revents = pollfd[1].revents = 0;
n0 = pkt_queued(pa, 0);
n1 = pkt_queued(pb, 0);
#if defined(_WIN32) || defined(BUSYWAIT)
if (n0){
ioctl(pollfd[1].fd, NIOCTXSYNC, NULL);
pollfd[1].revents = POLLOUT;
}
else {
ioctl(pollfd[0].fd, NIOCRXSYNC, NULL);
}
if (n1){
ioctl(pollfd[0].fd, NIOCTXSYNC, NULL);
pollfd[0].revents = POLLOUT;
}
else {
ioctl(pollfd[1].fd, NIOCRXSYNC, NULL);
}
ret = 1;
#else
if (n0)
pollfd[1].events |= POLLOUT;
else
Expand All @@ -271,6 +288,7 @@ main(int argc, char **argv)
else
pollfd[1].events |= POLLIN;
ret = poll(pollfd, 2, 2500);
#endif //defined(_WIN32) || defined(BUSYWAIT)
if (ret <= 0 || verbose)
D("poll %s [0] ev %x %x rx %d@%d tx %d,"
" [1] ev %x %x rx %d@%d tx %d",
Expand Down
108 changes: 108 additions & 0 deletions tools/tools/netmap/ctrs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#ifndef CTRS_H_
#define CTRS_H_

/* $FreeBSD$ */

#include <sys/time.h>

/* counters to accumulate statistics */
struct my_ctrs {
uint64_t pkts, bytes, events, drop;
uint64_t min_space;
struct timeval t;
};

/* very crude code to print a number in normalized form.
* Caller has to make sure that the buffer is large enough.
*/
static const char *
norm2(char *buf, double val, char *fmt)
{
char *units[] = { "", "K", "M", "G", "T" };
u_int i;

for (i = 0; val >=1000 && i < sizeof(units)/sizeof(char *) - 1; i++)
val /= 1000;
sprintf(buf, fmt, val, units[i]);
return buf;
}

static __inline const char *
norm(char *buf, double val)
{
return norm2(buf, val, "%.3f %s");
}

static __inline int
timespec_ge(const struct timespec *a, const struct timespec *b)
{

if (a->tv_sec > b->tv_sec)
return (1);
if (a->tv_sec < b->tv_sec)
return (0);
if (a->tv_nsec >= b->tv_nsec)
return (1);
return (0);
}

static __inline struct timespec
timeval2spec(const struct timeval *a)
{
struct timespec ts = {
.tv_sec = a->tv_sec,
.tv_nsec = a->tv_usec * 1000
};
return ts;
}

static __inline struct timeval
timespec2val(const struct timespec *a)
{
struct timeval tv = {
.tv_sec = a->tv_sec,
.tv_usec = a->tv_nsec / 1000
};
return tv;
}


static __inline struct timespec
timespec_add(struct timespec a, struct timespec b)
{
struct timespec ret = { a.tv_sec + b.tv_sec, a.tv_nsec + b.tv_nsec };
if (ret.tv_nsec >= 1000000000) {
ret.tv_sec++;
ret.tv_nsec -= 1000000000;
}
return ret;
}

static __inline struct timespec
timespec_sub(struct timespec a, struct timespec b)
{
struct timespec ret = { a.tv_sec - b.tv_sec, a.tv_nsec - b.tv_nsec };
if (ret.tv_nsec < 0) {
ret.tv_sec--;
ret.tv_nsec += 1000000000;
}
return ret;
}

static uint64_t
wait_for_next_report(struct timeval *prev, struct timeval *cur,
int report_interval)
{
struct timeval delta;

delta.tv_sec = report_interval/1000;
delta.tv_usec = (report_interval%1000)*1000;
if (select(0, NULL, NULL, NULL, &delta) < 0 && errno != EINTR) {
perror("select");
abort();
}
gettimeofday(cur, NULL);
timersub(cur, prev, &delta);
return delta.tv_sec* 1000000 + delta.tv_usec;
}
#endif /* CTRS_H_ */
129 changes: 129 additions & 0 deletions tools/tools/netmap/nmreplay.8
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
.\" Copyright (c) 2016 Luigi Rizzo, Universita` di Pisa
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 16, 2016
.Dt NMREPLAY 1
.Os
.Sh NAME
.Nm nmreplay
.Nd playback a pcap file through a netmap interface
.Sh SYNOPSIS
.Bk -words
.Bl -tag -width "nmreplay"
.It Nm
.Op Fl f Ar pcap-file
.Op Fl i Ar netmap-interface
.Op Fl B Ar bandwidth
.Op Fl D Ar delay
.Op Fl L Ar loss
.Op Fl b Ar batch size
.Op Fl w Ar wait-link
.Op Fl v
.Op Fl C Ar cpu-placement
.Sh DESCRIPTION
.Nm
works like
.Nm tcpreplay
to replay a pcap file through a netmap interface,
with programmable rates and possibly delays, losses
and packet alterations.
.Nm
is designed to run at high speed, so the transmit schedule
is computed ahead of time, and the thread in charge of transmission
only has to pump data through the interface.
.Nm
can connect to any type of netmap port.
.Pp
Command line options are as follows
.Bl -tag -width Ds
.It Fl f Ar pcap-file
Name of the pcap file to replay.
.It Fl i Ar interface
Name of the netmap interface to use as output.
.It Fl v
Enable verbose mode
.It Fl b Ar batch-size
Maximum batch size to use during transmissions.
.Nm
normally transmits packets one at a time, but it may use
larger batches, up to the value specified with this option,
when running at high rates.
.It Fl B Ar bps | Cm constant, Ns Ar bps | Cm ether, Ns Ar bps | Cm real Ns Op , Ns Ar speedup
Bandwidth to be used for transmission.
.Ar bps
is a floating point number optionally follow by a character
(k, K, m, M, g, G) that multiplies the value by 10^3, 10^6 and 10^9
respectively.
.Cm constant
(can be omitted) means that the bandwidth will be computed
with reference to the actual packet size (excluding CRC and framing).
.Cm ether
indicates that the ethernet framing (160 bits) and CRC (32 bits)
will be included in the computation of the packet size.
.Cm real
means transmission will occur according to the timestamps
recorded in the trace. The optional
.Ar speedup
multiplier (defaults to 1) indicates how much faster
or slower than real time the trace should be replayed.
.It Fl D Ar dt | Cm constant, Ns Ar dt | Cm uniform, Ns Ar dmin,dmax | Cm exp, Ar dmin,davg
Adds additional delay to the packet transmission, whose distribution
can be constant, uniform or exponential.
.Ar dt, dmin, dmax, avt
are times expressed as floating point numbers optionally followed
by a character (s, m, u, n) to indicate seconds, milliseconds,
microseconds, nanoseconds.
The delay is added to the transmit time and adjusted so that there is
never packet reordering.
.It Fl L Ar x | Cm plr, Ns Ar x | Cm ber, Ns Ar x
Simulates packet or bit errors, causing offending packets to be dropped.
.Ar x
is a floating point number indicating the packet or bit error rate.
.It Fl w Ar wait-link
indicates the number of seconds to wait before transmitting.
It defaults to 2, and may be useful when talking to physical
ports to let link negotiation complete before starting transmission.
.El
.Sh OPERATION
.Nm
creates an in-memory schedule with all packets to be transmitted,
and then launches a separate thread to take care of transmissions
while the main thread reports statistics every second.
.Sh SEE ALSO
.Pa http://info.iet.unipi.it/~luigi/netmap/
.Pp
Luigi Rizzo, Revisiting network I/O APIs: the netmap framework,
Communications of the ACM, 55 (3), pp.45-51, March 2012
.Pp
Luigi Rizzo, Giuseppe Lettieri,
VALE, a switched ethernet for virtual machines,
ACM CoNEXT'12, December 2012, Nice
.Sh AUTHORS
.An -nosplit
.Nm
has been written by
.An Luigi Rizzo, Andrea Beconcini, Francesco Mola and Lorenzo Biagini
at the Universita` di Pisa, Italy.
1,820 changes: 1,820 additions & 0 deletions tools/tools/netmap/nmreplay.c

Large diffs are not rendered by default.

1,114 changes: 854 additions & 260 deletions tools/tools/netmap/pkt-gen.c

Large diffs are not rendered by default.

73 changes: 57 additions & 16 deletions tools/tools/netmap/vale-ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@

/* $FreeBSD$ */

#define NETMAP_WITH_LIBS
#include <net/netmap_user.h>
#include <net/netmap.h>

#include <errno.h>
#include <stdio.h>
#include <inttypes.h> /* PRI* macros */
Expand All @@ -35,17 +39,9 @@
#include <sys/param.h>
#include <sys/socket.h> /* apple needs sockaddr */
#include <net/if.h> /* ifreq */
#include <net/netmap.h>
#include <net/netmap_user.h>
#include <libgen.h> /* basename */
#include <stdlib.h> /* atoi, free */

/* debug support */
#define ND(format, ...) do {} while(0)
#define D(format, ...) \
fprintf(stderr, "%s [%d] " format "\n", \
__FUNCTION__, __LINE__, ##__VA_ARGS__)

/* XXX cut and paste from pkt-gen.c because I'm not sure whether this
* program may include nm_util.h
*/
Expand Down Expand Up @@ -117,8 +113,11 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg, char *nmr_config)
break;
case NETMAP_BDG_ATTACH:
case NETMAP_BDG_DETACH:
if (nr_arg && nr_arg != NETMAP_BDG_HOST)
nmr.nr_flags = NR_REG_ALL_NIC;
if (nr_arg && nr_arg != NETMAP_BDG_HOST) {
nmr.nr_flags = NR_REG_NIC_SW;
nr_arg = 0;
}
nmr.nr_arg1 = nr_arg;
error = ioctl(fd, NIOCREGIF, &nmr);
if (error == -1) {
Expand Down Expand Up @@ -152,6 +151,36 @@ bdg_ctl(const char *name, int nr_cmd, int nr_arg, char *nmr_config)

break;

case NETMAP_BDG_POLLING_ON:
case NETMAP_BDG_POLLING_OFF:
/* We reuse nmreq fields as follows:
* nr_tx_slots: 0 and non-zero indicate REG_ALL_NIC
* REG_ONE_NIC, respectively.
* nr_rx_slots: CPU core index. This also indicates the
* first queue in the case of REG_ONE_NIC
* nr_tx_rings: (REG_ONE_NIC only) indicates the
* number of CPU cores or the last queue
*/
nmr.nr_flags |= nmr.nr_tx_slots ?
NR_REG_ONE_NIC : NR_REG_ALL_NIC;
nmr.nr_ringid = nmr.nr_rx_slots;
/* number of cores/rings */
if (nmr.nr_flags == NR_REG_ALL_NIC)
nmr.nr_arg1 = 1;
else
nmr.nr_arg1 = nmr.nr_tx_rings;

error = ioctl(fd, NIOCREGIF, &nmr);
if (!error)
D("polling on %s %s", nmr.nr_name,
nr_cmd == NETMAP_BDG_POLLING_ON ?
"started" : "stopped");
else
D("polling on %s %s (err %d)", nmr.nr_name,
nr_cmd == NETMAP_BDG_POLLING_ON ?
"couldn't start" : "couldn't stop", error);
break;

default: /* GINFO */
nmr.nr_cmd = nmr.nr_arg1 = nmr.nr_arg2 = 0;
error = ioctl(fd, NIOCGINFO, &nmr);
Expand All @@ -173,7 +202,7 @@ main(int argc, char *argv[])
const char *command = basename(argv[0]);
char *name = NULL, *nmr_config = NULL;

if (argc > 3) {
if (argc > 5) {
usage:
fprintf(stderr,
"Usage:\n"
Expand All @@ -186,12 +215,18 @@ main(int argc, char *argv[])
"\t-r interface interface name to be deleted\n"
"\t-l list all or specified bridge's interfaces (default)\n"
"\t-C string ring/slot setting of an interface creating by -n\n"
"\t-p interface start polling. Additional -C x,y,z configures\n"
"\t\t x: 0 (REG_ALL_NIC) or 1 (REG_ONE_NIC),\n"
"\t\t y: CPU core id for ALL_NIC and core/ring for ONE_NIC\n"
"\t\t z: (ONE_NIC only) num of total cores/rings\n"
"\t-P interface stop polling\n"
"", command);
return 0;
}

while ((ch = getopt(argc, argv, "d:a:h:g:l:n:r:C:")) != -1) {
name = optarg; /* default */
while ((ch = getopt(argc, argv, "d:a:h:g:l:n:r:C:p:P:")) != -1) {
if (ch != 'C')
name = optarg; /* default */
switch (ch) {
default:
fprintf(stderr, "bad option %c %s", ch, optarg);
Expand Down Expand Up @@ -223,11 +258,17 @@ main(int argc, char *argv[])
case 'C':
nmr_config = strdup(optarg);
break;
case 'p':
nr_cmd = NETMAP_BDG_POLLING_ON;
break;
case 'P':
nr_cmd = NETMAP_BDG_POLLING_OFF;
break;
}
if (optind != argc) {
// fprintf(stderr, "optind %d argc %d\n", optind, argc);
goto usage;
}
}
if (optind != argc) {
// fprintf(stderr, "optind %d argc %d\n", optind, argc);
goto usage;
}
if (argc == 1)
nr_cmd = NETMAP_BDG_LIST;
Expand Down