View
@@ -32,35 +32,30 @@
#ifndef _ENA_ETH_IO_H_
#define _ENA_ETH_IO_H_
/* Layer 3 protocol index */
enum ena_eth_io_l3_proto_index {
ENA_ETH_IO_L3_PROTO_UNKNOWN = 0,
ENA_ETH_IO_L3_PROTO_UNKNOWN = 0,
ENA_ETH_IO_L3_PROTO_IPV4 = 8,
ENA_ETH_IO_L3_PROTO_IPV4 = 8,
ENA_ETH_IO_L3_PROTO_IPV6 = 11,
ENA_ETH_IO_L3_PROTO_IPV6 = 11,
ENA_ETH_IO_L3_PROTO_FCOE = 21,
ENA_ETH_IO_L3_PROTO_FCOE = 21,
ENA_ETH_IO_L3_PROTO_ROCE = 22,
ENA_ETH_IO_L3_PROTO_ROCE = 22,
};
/* Layer 4 protocol index */
enum ena_eth_io_l4_proto_index {
ENA_ETH_IO_L4_PROTO_UNKNOWN = 0,
ENA_ETH_IO_L4_PROTO_UNKNOWN = 0,
ENA_ETH_IO_L4_PROTO_TCP = 12,
ENA_ETH_IO_L4_PROTO_TCP = 12,
ENA_ETH_IO_L4_PROTO_UDP = 13,
ENA_ETH_IO_L4_PROTO_UDP = 13,
ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23,
ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23,
};
/* ENA IO Queue Tx descriptor */
struct ena_eth_io_tx_desc {
/* word 0 : */
/* length, request id and control flags
* 15:0 : length - Buffer length in bytes, must
/* 15:0 : length - Buffer length in bytes, must
* include any packet trailers that the ENA supposed
* to update like End-to-End CRC, Authentication GMAC
* etc. This length must not include the
@@ -83,9 +78,7 @@ struct ena_eth_io_tx_desc {
*/
u32 len_ctrl;
/* word 1 : */
/* ethernet control
* 3:0 : l3_proto_idx - L3 protocol. This field
/* 3:0 : l3_proto_idx - L3 protocol. This field
* required when l3_csum_en,l3_csum or tso_en are set.
* 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and
* DF flags of the IPv4 header is 0. Otherwise must
@@ -117,10 +110,8 @@ struct ena_eth_io_tx_desc {
*/
u32 meta_ctrl;
/* word 2 : Buffer address bits[31:0] */
u32 buff_addr_lo;
/* word 3 : */
/* address high and header size
* 15:0 : addr_hi - Buffer Pointer[47:32]
* 23:16 : reserved16_w2
@@ -139,11 +130,8 @@ struct ena_eth_io_tx_desc {
u32 buff_addr_hi_hdr_sz;
};
/* ENA IO Queue Tx Meta descriptor */
struct ena_eth_io_tx_meta_desc {
/* word 0 : */
/* length, request id and control flags
* 9:0 : req_id_lo - Request ID[9:0]
/* 9:0 : req_id_lo - Request ID[9:0]
* 11:10 : reserved10 - MBZ
* 12 : reserved12 - MBZ
* 13 : reserved13 - MBZ
@@ -172,19 +160,13 @@ struct ena_eth_io_tx_meta_desc {
*/
u32 len_ctrl;
/* word 1 : */
/* word 1
* 5:0 : req_id_hi
/* 5:0 : req_id_hi
* 31:6 : reserved6 - MBZ
*/
u32 word1;
/* word 2 : */
/* word 2
* 7:0 : l3_hdr_len - the header length L3 IP header.
* 15:8 : l3_hdr_off - the offset of the first byte
* in the L3 header from the beginning of the to-be
* transmitted packet.
/* 7:0 : l3_hdr_len
* 15:8 : l3_hdr_off
* 21:16 : l4_hdr_len_in_words - counts the L4 header
* length in words. there is an explicit assumption
* that L4 header appears right after L3 header and
@@ -193,13 +175,10 @@ struct ena_eth_io_tx_meta_desc {
*/
u32 word2;
/* word 3 : */
u32 reserved;
};
/* ENA IO Queue Tx completions descriptor */
struct ena_eth_io_tx_cdesc {
/* word 0 : */
/* Request ID[15:0] */
u16 req_id;
@@ -211,24 +190,19 @@ struct ena_eth_io_tx_cdesc {
*/
u8 flags;
/* word 1 : */
u16 sub_qid;
/* indicates location of submission queue head */
u16 sq_head_idx;
};
/* ENA IO Queue Rx descriptor */
struct ena_eth_io_rx_desc {
/* word 0 : */
/* In bytes. 0 means 64KB */
u16 length;
/* MBZ */
u8 reserved2;
/* control flags
* 0 : phase
/* 0 : phase
* 1 : reserved1 - MBZ
* 2 : first - Indicates first descriptor in
* transaction
@@ -239,32 +213,27 @@ struct ena_eth_io_rx_desc {
*/
u8 ctrl;
/* word 1 : */
u16 req_id;
/* MBZ */
u16 reserved6;
/* word 2 : Buffer address bits[31:0] */
u32 buff_addr_lo;
/* word 3 : */
/* Buffer Address bits[47:16] */
u16 buff_addr_hi;
/* MBZ */
u16 reserved16_w3;
};
/* ENA IO Queue Rx Completion Base Descriptor (4-word format). Note: all
* ethernet parsing information are valid only when last=1
/* 4-word format Note: all ethernet parsing information are valid only when
* last=1
*/
struct ena_eth_io_rx_cdesc_base {
/* word 0 : */
/* 4:0 : l3_proto_idx - L3 protocol index
* 6:5 : src_vlan_cnt - Source VLAN count
/* 4:0 : l3_proto_idx
* 6:5 : src_vlan_cnt
* 7 : reserved7 - MBZ
* 12:8 : l4_proto_idx - L4 protocol index
* 12:8 : l4_proto_idx
* 13 : l3_csum_err - when set, either the L3
* checksum error detected, or, the controller didn't
* validate the checksum. This bit is valid only when
@@ -289,56 +258,43 @@ struct ena_eth_io_rx_cdesc_base {
*/
u32 status;
/* word 1 : */
u16 length;
u16 req_id;
/* word 2 : 32-bit hash result */
/* 32-bit hash result */
u32 hash;
/* word 3 : */
/* submission queue number */
u16 sub_qid;
u16 reserved;
};
/* ENA IO Queue Rx Completion Descriptor (8-word format) */
/* 8-word format */
struct ena_eth_io_rx_cdesc_ext {
/* words 0:3 : Rx Completion Extended */
struct ena_eth_io_rx_cdesc_base base;
/* word 4 : Completed Buffer address bits[31:0] */
u32 buff_addr_lo;
/* word 5 : */
/* the buffer address used bits[47:32] */
u16 buff_addr_hi;
u16 reserved16;
/* word 6 : Reserved */
u32 reserved_w6;
/* word 7 : Reserved */
u32 reserved_w7;
};
/* ENA Interrupt Unmask Register */
struct ena_eth_io_intr_reg {
/* word 0 : */
/* 14:0 : rx_intr_delay - rx interrupt delay value
* 29:15 : tx_intr_delay - tx interrupt delay value
* 30 : intr_unmask - if set, unmasks interrupt
/* 14:0 : rx_intr_delay
* 29:15 : tx_intr_delay
* 30 : intr_unmask
* 31 : reserved
*/
u32 intr_control;
};
/* ENA NUMA Node configuration register */
struct ena_eth_io_numa_node_cfg_reg {
/* word 0 : */
/* 7:0 : numa
* 30:8 : reserved
* 31 : enabled
View
@@ -78,6 +78,8 @@
#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e
#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8
#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00
#define ENA_REGS_CAPS_ADMIN_CMD_TO_SHIFT 16
#define ENA_REGS_CAPS_ADMIN_CMD_TO_MASK 0xf0000
/* aq_caps register */
#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff
View
@@ -3,28 +3,28 @@ Linux kernel driver for Elastic Network Adapter (ENA) family:
Overview:
=========
The ENA driver provides a modern Ethernet device interface optimized
for high performance and low CPU overhead.
ENA is a networking interface designed to make good use of modern CPU
features and system architectures.
The ENA driver exposes a lightweight management interface with a
The ENA device exposes a lightweight management interface with a
minimal set of memory mapped registers and extendable command set
through an Admin Queue.
The driver supports a wide range of ENA devices, is link-speed
independent (i.e., the same driver is used for 10GbE, 25GbE, 40GbE,
etc.), and it has a negotiated and extendable feature set.
The driver supports a range of ENA devices, is link-speed independent
(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
a negotiated and extendable feature set.
Some ENA devices support SR-IOV. This driver is used for both the
SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
ENA devices allow high speed and low overhead Ethernet traffic
processing by providing a dedicated Tx/Rx queue pair per host CPU, a
dedicated MSI-X interrupt vector per Tx/Rx queue pair, adaptive
interrupt moderation, and CPU cacheline optimized data placement.
ENA devices enable high speed and low overhead network traffic
processing by providing multiple Tx/Rx queue pairs (the maximum number
is advertised by the device via the Admin Queue), a dedicated MSI-X
interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
and CPU cacheline optimized data placement.
The ENA driver supports industry standard TCP/IP offload features such
as checksum offload and TCP transmit segmentation offload (TSO).
Receive-side scaling (RSS) is supported for multi-core scaling.
The ENA driver and its corresponding devices implement health
@@ -34,6 +34,7 @@ debug logs.
Some of the ENA devices support a working mode called Low-latency
Queue (LLQ), which saves several more microseconds.
#ifndef MAINLINE
Driver compilation:
===================
@@ -74,6 +75,7 @@ insert "ena" to the file
copy the ena.ko to /lib/modules/(uname -r)/
sudo depmod
restart the OS (sudo reboot and reconnect)
#endif
Supported PCI vendor ID/device IDs:
===================================
@@ -217,64 +219,44 @@ Interrupt Moderation:
=====================
ENA driver and device can operate in conventional or adaptive interrupt
moderation mode.
In conventional mode the driver instructs device to postpone interrupt posting
according to static interrupt delay value. The interrupt delay value can be
configured through ethtool(8). The following ethtool parameters are supported
by the driver: tx-usecs, rx-usecs
In adaptive interrupt moderation mode the interrupt delay value is updated by
the driver dynamically and adjusted every NAPI cycle according to the traffic
nature.
By default ENA driver applies adaptive coalescing on Rx traffic and conventional
coalescing on Tx traffic.
Adaptive coalescing can be switched on/off through ethtool(8) adaptive_rx on|off
parameter.
The driver chooses interrupt delay value according to the number of bytes and
packets received between interrupt unmasking and interrupt posting. The driver
uses interrupt delay table that subdivides the range of received bytes/packets
into 5 levels and assigns interrupt delay value to each level.
The user can enable/disable adaptive moderation, modify the interrupt delay
table and restore its default values through sysfs.
Memory Allocations:
===================
DMA Coherent buffers are allocated for the following DMA rings:
- Tx submission ring (For regular mode; for LLQ mode it is allocated
using kzalloc)
- Tx completion ring
- Rx submission ring
- Rx completion ring
- Admin submission ring
- Admin completion ring
- AENQ ring
The ENA device AQ and AENQ are allocated on probe and freed on termination.
Regular allocations:
- Tx buffers info ring
- Tx free indexes ring
- Rx buffers info ring
- MSI-X table
- ENA device structure
Tx/Rx buffers and the MSI-X table are allocated on Open and freed on Close.
Rx buffer allocation:
- The driver allocates buffers using netdev_alloc_frag()
- Buffers are allocated when:
1. enabling an interface -- open()
2. Once per Rx poll for all the frames received and not copied to
the newly allocated SKB
These buffers are freed on close().
The small_packet_len is initialized by default to
ENA_DEFAULT_SMALL_PACKET_LEN and can be configured by the sysfs path
/sys/bus/pci/devices/<domain:bus:slot.function>/small_packet_len.
In conventional mode the driver instructs device to postpone interrupt
posting according to static interrupt delay value. The interrupt delay
value can be configured through ethtool(8). The following ethtool
parameters are supported by the driver: tx-usecs, rx-usecs
In adaptive interrupt moderation mode the interrupt delay value is
updated by the driver dynamically and adjusted every NAPI cycle
according to the traffic nature.
By default ENA driver applies adaptive coalescing on Rx traffic and
conventional coalescing on Tx traffic.
Adaptive coalescing can be switched on/off through ethtool(8)
adaptive_rx on|off parameter.
The driver chooses interrupt delay value according to the number of
bytes and packets received between interrupt unmasking and interrupt
posting. The driver uses interrupt delay table that subdivides the
range of received bytes/packets into 5 levels and assigns interrupt
delay value to each level.
The user can enable/disable adaptive moderation, modify the interrupt
delay table and restore its default values through sysfs.
The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
#ifndef MAINLINE
and can be configured by the sysfs path
/sys/bus/pci/devices/<domain:bus:slot.function>/rx_copybreak.
#else
and can be configured by the ETHTOOL_STUNABLE command of the
SIOCETHTOOL ioctl.
#endif
SKB:
The driver-allocated SKB for frames received from Rx handling using
NAPI context. The allocation method depends on the size of the packet.
If the frame length is larger than small_packet_len, napi_get_frags()
If the frame length is larger than rx_copybreak, napi_get_frags()
is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
content is copied (by CPU) to the SKB, and the buffer is recycled.
@@ -291,7 +273,7 @@ MTU:
The driver supports an arbitrarily large MTU with a maximum that is
negotiated with the device. The driver configures MTU using the
SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
via ifconfig(8) and ip(8).
via ip(8) and similar legacy tools.
Stateless Offloads:
===================
@@ -358,9 +340,9 @@ Rx:
no new packet is found.
- Then it calls the ena_clean_rx_irq() function.
- ena_eth_rx_skb() checks packet length:
* If the packet is small (len < small_packet_len), the driver
allocates a SKB for the new packet, and copies the packet payload
into the SKB data buffer.
* If the packet is small (len < rx_copybreak), the driver allocates
a SKB for the new packet, and copies the packet payload into the
SKB data buffer.
- In this way the original data buffer is not passed to the stack
and is reused for future Rx packets.
* Otherwise the function unmaps the Rx buffer, then allocates the
View
@@ -1,6 +1,57 @@
==== ENA Driver Release notes ====
---- r1.1.2 ----
New Features:
* Add ndo busy poll callback, that will typically reduce network latency.
* Use napi_schedule_irqoff when possible
* move from ena_trc_* to pr_* functions and ENA_ASSERT to WARN
* Indentations and fix comments structure
* Add prefetch to the driver
* Add hardware hints
* Remove affinity hints in the driver, allowing the irq balancer to move
it depending on the load.
Developers can still override affinity using /proc/irq/*/smp_affinity
Bug Fixes:
* Initialized last_keep_alive_jiffies
Can cause watchdog reset if the value isn't initialized
After this watchdog driver reset it initiated, it will not happen again
while the OS is running.
* Reorder the initialization of the workqueues and the timer service
In the highly unlikely event of driver failing on probe the reset workqueue
cause access to freed aread.
* Remove redundant logic in napi callback for busy poll mode.
Impact the performance on kernel >= 4.5 when CONFIG_NET_RX_BUSY_POLL is enable
and socket is openned with SO_BUSY_POLL
* In RSS hash configuration add missing variable initialization.
* Fix type mismatch in structs initialization
* Fix kernel starvation when get_statistics is called from atomic context
* Fix potential memory corruption during reset and restart flow.
* Fix kernel panic when driver reset fail
Minor changes:
* Reduce the number of printouts
* Move printing of unsupported negotiated feature to _dbg instead of _notice
* Increase default admin timeout to 3 sec and Keep-Alive to 5 sec.
* Change the behaiver of Tx xmit in case of an error.
drop the packet and return NETDEV_TX_OK instead of retunring NETDEV_TX_BUSY
---- r1.0.2 ----
New Features:
* Reduce the number of parameters and use context for ena_get_dev_stats
* Don't initialize variables if the driver don't use their value.
* Use get_link_ksettings instead get_settings (for kernels > 4.6)
Bug Fixes:
* Move printing of unsupported negotiated feature to _dbg instead of _notice
* Fix ethtool RSS flow configuration
* Add missing break in ena_get_rxfh
Minor changes:
* Remove ena_nway_reset since it only return -ENODEV
* rename small_copy_len tunable to rx_copybreak to match with main linux tree commit
---- r1.0.0 ----
View
@@ -80,7 +80,6 @@ static const struct ena_stats ena_stats_tx_strings[] = {
ENA_STAT_TX_ENTRY(tx_poll),
ENA_STAT_TX_ENTRY(doorbells),
ENA_STAT_TX_ENTRY(prepare_ctx_err),
ENA_STAT_TX_ENTRY(missing_tx_comp),
ENA_STAT_TX_ENTRY(bad_req_id),
};
@@ -93,7 +92,12 @@ static const struct ena_stats ena_stats_rx_strings[] = {
ENA_STAT_RX_ENTRY(skb_alloc_fail),
ENA_STAT_RX_ENTRY(dma_mapping_err),
ENA_STAT_RX_ENTRY(bad_desc_num),
ENA_STAT_RX_ENTRY(small_copy_len_pkt),
ENA_STAT_RX_ENTRY(rx_copybreak_pkt),
#ifdef CONFIG_NET_RX_BUSY_POLL
ENA_STAT_RX_ENTRY(bp_yield),
ENA_STAT_RX_ENTRY(bp_missed),
ENA_STAT_RX_ENTRY(bp_cleaned),
#endif
};
static const struct ena_stats ena_stats_ena_com_strings[] = {
@@ -195,12 +199,13 @@ static void ena_get_ethtool_stats(struct net_device *netdev,
int ena_get_sset_count(struct net_device *netdev, int sset)
{
struct ena_adapter *adapter = netdev_priv(netdev);
if (sset != ETH_SS_STATS)
return -EOPNOTSUPP;
return netdev->num_tx_queues *
(ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) +
ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
}
static void ena_queue_strings(struct ena_adapter *adapter, u8 **data)
@@ -262,6 +267,40 @@ static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
ena_com_dev_strings(&data);
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
static int ena_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *link_ksettings)
{
struct ena_adapter *adapter = netdev_priv(netdev);
struct ena_com_dev *ena_dev = adapter->ena_dev;
struct ena_admin_get_feature_link_desc *link;
struct ena_admin_get_feat_resp feat_resp;
int rc;
rc = ena_com_get_link_params(ena_dev, &feat_resp);
if (rc)
return rc;
link = &feat_resp.u.link;
link_ksettings->base.speed = link->speed;
if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) {
ethtool_link_ksettings_add_link_mode(link_ksettings,
supported, Autoneg);
ethtool_link_ksettings_add_link_mode(link_ksettings,
supported, Autoneg);
}
link_ksettings->base.autoneg =
(link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ?
AUTONEG_ENABLE : AUTONEG_DISABLE;
link_ksettings->base.duplex = DUPLEX_FULL;
return 0;
}
#else
static int ena_get_settings(struct net_device *netdev,
struct ethtool_cmd *ecmd)
{
@@ -292,6 +331,7 @@ static int ena_get_settings(struct net_device *netdev,
return 0;
}
#endif
static int ena_get_coalesce(struct net_device *net_dev,
struct ethtool_coalesce *coalesce)
{
@@ -305,10 +345,11 @@ static int ena_get_coalesce(struct net_device *net_dev,
coalesce->tx_coalesce_usecs =
ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) /
ena_dev->intr_delay_resolution;
if (!ena_com_get_adaptive_moderation_enabled(ena_dev))
if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) {
coalesce->rx_coalesce_usecs =
ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
/ ena_dev->intr_delay_resolution;
}
coalesce->use_adaptive_rx_coalesce =
ena_com_get_adaptive_moderation_enabled(ena_dev);
@@ -338,32 +379,33 @@ static int ena_set_coalesce(struct net_device *net_dev,
return -EOPNOTSUPP;
}
/* Note, adaptive coalescing settings are updated through sysfs */
if (coalesce->rx_coalesce_usecs_irq ||
coalesce->rx_max_coalesced_frames ||
coalesce->rx_max_coalesced_frames_irq ||
coalesce->tx_coalesce_usecs_irq ||
coalesce->tx_max_coalesced_frames ||
coalesce->tx_max_coalesced_frames_irq ||
coalesce->stats_block_coalesce_usecs ||
coalesce->use_adaptive_tx_coalesce ||
coalesce->pkt_rate_low ||
coalesce->rx_coalesce_usecs_low ||
coalesce->rx_max_coalesced_frames_low ||
coalesce->tx_coalesce_usecs_low ||
coalesce->tx_max_coalesced_frames_low ||
coalesce->pkt_rate_high ||
coalesce->rx_coalesce_usecs_high ||
coalesce->rx_max_coalesced_frames_high ||
coalesce->tx_coalesce_usecs_high ||
coalesce->tx_max_coalesced_frames_high ||
coalesce->rate_sample_interval)
return -EINVAL;
/* Note, adaptive coalescing settings are updated through sysfs */
if (coalesce->rx_max_coalesced_frames ||
coalesce->rx_coalesce_usecs_low ||
coalesce->rx_max_coalesced_frames_low ||
coalesce->rx_coalesce_usecs_high ||
coalesce->rx_max_coalesced_frames_high)
return -EINVAL;
rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
coalesce->tx_coalesce_usecs);
if (rc)
goto err;
return rc;
ena_update_tx_rings_intr_moderation(adapter);
@@ -372,11 +414,10 @@ static int ena_set_coalesce(struct net_device *net_dev,
ena_com_disable_adaptive_moderation(ena_dev);
rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
coalesce->rx_coalesce_usecs);
if (rc)
goto err;
return rc;
} else {
/* was in adaptive mode and remains in it,
* allow to update only tx_usecs
* allow to update only tx_usecs, rx is managed through sysfs
*/
if (coalesce->rx_coalesce_usecs)
return -EINVAL;
@@ -387,18 +428,11 @@ static int ena_set_coalesce(struct net_device *net_dev,
} else {
rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
coalesce->rx_coalesce_usecs);
goto err;
return rc;
}
}
return 0;
err:
return rc;
}
static int ena_nway_reset(struct net_device *netdev)
{
return -ENODEV;
}
static u32 ena_get_msglevel(struct net_device *netdev)
@@ -663,8 +697,10 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
switch (ena_func) {
case ENA_ADMIN_TOEPLITZ:
func = ETH_RSS_HASH_TOP;
break;
case ENA_ADMIN_CRC32:
func = ETH_RSS_HASH_XOR;
break;
default:
netif_err(adapter, drv, netdev,
"Command parameter is not supported\n");
@@ -812,12 +848,60 @@ static void ena_get_channels(struct net_device *netdev,
}
#endif /* ETHTOOL_SCHANNELS */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
static int ena_get_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna, void *data)
{
struct ena_adapter *adapter = netdev_priv(netdev);
int ret = 0;
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
*(u32 *)data = adapter->rx_copybreak;
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
static int ena_set_tunable(struct net_device *netdev,
const struct ethtool_tunable *tuna,
const void *data)
{
struct ena_adapter *adapter = netdev_priv(netdev);
int ret = 0;
u32 len;
switch (tuna->id) {
case ETHTOOL_RX_COPYBREAK:
len = *(u32 *)data;
if (len > adapter->netdev->mtu) {
ret = -EINVAL;
break;
}
adapter->rx_copybreak = len;
break;
default:
ret = -EINVAL;
break;
}
return ret;
}
#endif /* 3.18.0 */
static const struct ethtool_ops ena_ethtool_ops = {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 6, 0)
.get_link_ksettings = ena_get_link_ksettings,
#else
.get_settings = ena_get_settings,
#endif
.get_drvinfo = ena_get_drvinfo,
.get_msglevel = ena_get_msglevel,
.set_msglevel = ena_set_msglevel,
.nway_reset = ena_nway_reset,
.get_link = ethtool_op_get_link,
.get_coalesce = ena_get_coalesce,
.set_coalesce = ena_set_coalesce,
@@ -843,6 +927,10 @@ static const struct ethtool_ops ena_ethtool_ops = {
#ifdef ETHTOOL_SCHANNELS
.get_channels = ena_get_channels,
#endif /* ETHTOOL_SCHANNELS */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 18, 0)
.get_tunable = ena_get_tunable,
.set_tunable = ena_set_tunable,
#endif
};
void ena_set_ethtool_ops(struct net_device *netdev)
View

Large diffs are not rendered by default.

Oops, something went wrong.
View
@@ -45,8 +45,8 @@
#include "ena_eth_com.h"
#define DRV_MODULE_VER_MAJOR 1
#define DRV_MODULE_VER_MINOR 0
#define DRV_MODULE_VER_SUBMINOR 0
#define DRV_MODULE_VER_MINOR 1
#define DRV_MODULE_VER_SUBMINOR 2
#define DRV_MODULE_NAME "ena"
#ifndef DRV_MODULE_VERSION
@@ -55,7 +55,6 @@
__stringify(DRV_MODULE_VER_MINOR) "." \
__stringify(DRV_MODULE_VER_SUBMINOR)
#endif
#define DRV_MODULE_RELDATE "22-JUNE-2016"
#define DEVICE_NAME "Elastic Network Adapter (ENA)"
@@ -69,7 +68,7 @@
#define ENA_DEFAULT_RING_SIZE (1024)
#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2)
#define ENA_DEFAULT_SMALL_PACKET_LEN (128 - NET_IP_ALIGN)
#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN)
/* limit the buffer size to 600 bytes to handle MTU changes from very
* small to very large, in which case the number of buffers per packet
@@ -102,7 +101,7 @@
/* Number of queues to check for missing queues per timer service */
#define ENA_MONITORED_TX_QUEUES 4
/* Max timeout packets before device reset */
#define MAX_NUM_OF_TIMEOUTED_PACKETS 32
#define MAX_NUM_OF_TIMEOUTED_PACKETS 128
#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1))
@@ -118,9 +117,9 @@
#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q))
/* ENA device should send keep alive msg every 1 sec.
* We wait for 3 sec just to be on the safe side.
* We wait for 6 sec just to be on the safe side.
*/
#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ)
#define ENA_DEVICE_KALIVE_TIMEOUT (6 * HZ)
#define ENA_MMIO_DISABLE_REG_READ BIT(0)
@@ -137,6 +136,7 @@ struct ena_napi {
struct napi_struct napi ____cacheline_aligned;
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
atomic_t unmask_interrupt;
u32 qid;
};
@@ -148,7 +148,18 @@ struct ena_tx_buffer {
u32 tx_descs;
/* num of buffers used by this skb */
u32 num_of_bufs;
/* Save the last jiffies to detect missing tx packets */
/* Used for detect missing tx packets to limit the number of prints */
u32 print_once;
/* Save the last jiffies to detect missing tx packets
*
* sets to non zero value on ena_start_xmit and set to zero on
* napi and timer_Service_routine.
*
* while this value is not protected by lock,
* a given packet is not expected to be handled by ena_start_xmit
* and by napi/timer_service at the same time.
*/
unsigned long last_jiffies;
struct ena_com_buf bufs[ENA_PKT_MAX_BUFS];
} ____cacheline_aligned;
@@ -172,7 +183,6 @@ struct ena_stats_tx {
u64 napi_comp;
u64 tx_poll;
u64 doorbells;
u64 missing_tx_comp;
u64 bad_req_id;
};
@@ -185,7 +195,12 @@ struct ena_stats_rx {
u64 skb_alloc_fail;
u64 dma_mapping_err;
u64 bad_desc_num;
u64 small_copy_len_pkt;
u64 rx_copybreak_pkt;
#ifdef CONFIG_NET_RX_BUSY_POLL
u64 bp_yield;
u64 bp_missed;
u64 bp_cleaned;
#endif
};
struct ena_ring {
@@ -208,7 +223,7 @@ struct ena_ring {
u16 next_to_use;
u16 next_to_clean;
u16 rx_small_copy_len;
u16 rx_copybreak;
u16 qid;
u16 mtu;
u16 sgl_size;
@@ -233,8 +248,19 @@ struct ena_ring {
struct ena_stats_tx tx_stats;
struct ena_stats_rx rx_stats;
};
#ifdef CONFIG_NET_RX_BUSY_POLL
atomic_t bp_state;
#endif
} ____cacheline_aligned;
#ifdef CONFIG_NET_RX_BUSY_POLL
enum ena_busy_poll_state_t {
ENA_BP_STATE_IDLE = 0,
ENA_BP_STATE_NAPI,
ENA_BP_STATE_POLL,
ENA_BP_STATE_DISABLE
};
#endif
struct ena_stats_dev {
u64 tx_timeout;
u64 io_suspend;
@@ -243,6 +269,7 @@ struct ena_stats_dev {
u64 interface_up;
u64 interface_down;
u64 admin_q_pause;
u64 rx_drops;
};
enum ena_flags_t {
@@ -263,14 +290,16 @@ struct ena_adapter {
/* rx packets that shorter that this len will be copied to the skb
* header
*/
u32 small_copy_len;
u32 rx_copybreak;
u32 max_mtu;
int num_queues;
struct msix_entry *msix_entries;
int msix_vecs;
u32 missing_tx_completion_threshold;
u32 tx_usecs, rx_usecs; /* interrupt moderation */
u32 tx_frames, rx_frames; /* interrupt moderation */
@@ -284,6 +313,9 @@ struct ena_adapter {
u8 mac_addr[ETH_ALEN];
unsigned long keep_alive_timeout;
unsigned long missing_tx_completion_to;
char name[ENA_NAME_MAX_LEN];
unsigned long flags;
@@ -305,6 +337,7 @@ struct ena_adapter {
struct work_struct resume_io_task;
struct timer_list timer_service;
bool wd_state;
unsigned long last_keep_alive_jiffies;
struct u64_stats_sync syncp;
@@ -322,4 +355,106 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
int ena_get_sset_count(struct net_device *netdev, int sset);
#ifdef CONFIG_NET_RX_BUSY_POLL
static inline void ena_bp_init_lock(struct ena_ring *rx_ring)
{
/* reset state to idle */
atomic_set(&rx_ring->bp_state, ENA_BP_STATE_IDLE);
}
/* called from the napi routine to get ownership of the ring */
static inline bool ena_bp_lock_napi(struct ena_ring *rx_ring)
{
int rc = atomic_cmpxchg(&rx_ring->bp_state, ENA_BP_STATE_IDLE,
ENA_BP_STATE_NAPI);
if (rc != ENA_BP_STATE_IDLE) {
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->rx_stats.bp_yield++;
u64_stats_update_end(&rx_ring->syncp);
}
return rc == ENA_BP_STATE_IDLE;
}
static inline void ena_bp_unlock_napi(struct ena_ring *rx_ring)
{
WARN_ON(atomic_read(&rx_ring->bp_state) != ENA_BP_STATE_NAPI);
/* flush any outstanding Rx frames */
if (rx_ring->napi->gro_list)
napi_gro_flush(rx_ring->napi, false);
/* reset state to idle */
atomic_set(&rx_ring->bp_state, ENA_BP_STATE_IDLE);
}
/* called from ena_ll_busy_poll() */
static inline bool ena_bp_lock_poll(struct ena_ring *rx_ring)
{
int rc = atomic_cmpxchg(&rx_ring->bp_state, ENA_BP_STATE_IDLE,
ENA_BP_STATE_POLL);
if (rc != ENA_BP_STATE_IDLE) {
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->rx_stats.bp_yield++;
u64_stats_update_end(&rx_ring->syncp);
}
return rc == ENA_BP_STATE_IDLE;
}
static inline void ena_bp_unlock_poll(struct ena_ring *rx_ring)
{
WARN_ON(atomic_read(&rx_ring->bp_state) != ENA_BP_STATE_POLL);
/* reset state to idle */
atomic_set(&rx_ring->bp_state, ENA_BP_STATE_IDLE);
}
/* true if a socket is polling, even if it did not get the lock */
static inline bool ena_bp_busy_polling(struct ena_ring *rx_ring)
{
return atomic_read(&rx_ring->bp_state) == ENA_BP_STATE_POLL;
}
static inline bool ena_bp_disable(struct ena_ring *rx_ring)
{
int rc = atomic_cmpxchg(&rx_ring->bp_state, ENA_BP_STATE_IDLE,
ENA_BP_STATE_DISABLE);
return rc == ENA_BP_STATE_IDLE;
}
#else
static inline void ena_bp_init_lock(struct ena_ring *rx_ring)
{
}
static inline bool ena_bp_lock_napi(struct ena_ring *rx_ring)
{
return true;
}
static inline void ena_bp_unlock_napi(struct ena_ring *rx_ring)
{
}
static inline bool ena_bp_lock_poll(struct ena_ring *rx_ring)
{
return false;
}
static inline void ena_bp_unlock_poll(struct ena_ring *rx_ring)
{
}
static inline bool ena_bp_busy_polling(struct ena_ring *rx_ring)
{
return false;
}
static inline bool ena_bp_disable(struct ena_ring *rx_ring)
{
return true;
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
#endif /* !(ENA_H) */
View
@@ -40,54 +40,48 @@
#include "ena_sysfs.h"
#define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr)
static int ena_validate_small_copy_len(struct ena_adapter *adapter,
unsigned long len)
{
if (len > adapter->netdev->mtu)
return -EINVAL;
return 0;
}
static ssize_t ena_store_small_copy_len(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
static ssize_t ena_store_rx_copybreak(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
struct ena_adapter *adapter = dev_get_drvdata(dev);
unsigned long small_copy_len;
unsigned long rx_copybreak;
struct ena_ring *rx_ring;
int err, i;
err = kstrtoul(buf, 10, &small_copy_len);
err = kstrtoul(buf, 10, &rx_copybreak);
if (err < 0)
return err;
err = ena_validate_small_copy_len(adapter, small_copy_len);
if (err)
return err;
if (len > adapter->netdev->mtu)
return -EINVAL;
rtnl_lock();
adapter->small_copy_len = small_copy_len;
adapter->rx_copybreak = rx_copybreak;
for (i = 0; i < adapter->num_queues; i++) {
rx_ring = &adapter->rx_ring[i];
rx_ring->rx_small_copy_len = small_copy_len;
rx_ring->rx_copybreak = rx_copybreak;
}
rtnl_unlock();
return len;
}
static ssize_t ena_show_small_copy_len(struct device *dev,
struct device_attribute *attr, char *buf)
static ssize_t ena_show_rx_copybreak(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct ena_adapter *adapter = dev_get_drvdata(dev);
return sprintf(buf, "%d\n", adapter->small_copy_len);
return sprintf(buf, "%d\n", adapter->rx_copybreak);
}
static DEVICE_ATTR(small_copy_len, S_IRUGO | S_IWUSR, ena_show_small_copy_len,
ena_store_small_copy_len);
static DEVICE_ATTR(rx_copybreak, S_IRUGO | S_IWUSR, ena_show_rx_copybreak,
ena_store_rx_copybreak);
#endif /* kernel version < 3.18 */
/* adaptive interrupt moderation */
static ssize_t ena_show_intr_moderation(struct device *dev,
@@ -215,8 +209,11 @@ int ena_sysfs_init(struct device *dev)
int i, rc;
struct ena_adapter *adapter = dev_get_drvdata(dev);
if (device_create_file(dev, &dev_attr_small_copy_len))
dev_err(dev, "failed to create small_copy_len sysfs entry");
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
if (device_create_file(dev, &dev_attr_rx_copybreak))
dev_err(dev, "failed to create rx_copybreak sysfs entry");
#endif
if (ena_com_interrupt_moderation_supported(adapter->ena_dev)) {
if (device_create_file(dev,
@@ -251,7 +248,9 @@ void ena_sysfs_terminate(struct device *dev)
struct ena_adapter *adapter = dev_get_drvdata(dev);
int i;
device_remove_file(dev, &dev_attr_small_copy_len);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0)
device_remove_file(dev, &dev_attr_rx_copybreak);
#endif
if (ena_com_interrupt_moderation_supported(adapter->ena_dev)) {
for (i = 0; i < ARRAY_SIZE(dev_attr_intr_moderation); i++)
sysfs_remove_file(&dev->kobj,
View
@@ -71,6 +71,7 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,6,0)
#include <linux/sizes.h>
#endif
#ifndef SZ_4K
#define SZ_4K 0x00001000
#endif
@@ -252,6 +253,25 @@ static inline void _kc_eth_random_addr(u8 *addr)
#endif
#endif /* < 3.6.0 */
/******************************************************************************/
#ifndef CONFIG_NET_RX_BUSY_POLL
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
{
}
static inline void napi_hash_del(struct napi_struct *napi)
{
}
static inline void napi_hash_add(struct napi_struct *napi)
{
}
#endif /* CONFIG_NET_RX_BUSY_POLL */
/*****************************************************************************/
#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) )
/* cpu_rmap is buggy on older version and causes dead lock */
@@ -376,6 +396,13 @@ static inline void netdev_rss_key_fill(void *buffer, size_t len)
{
get_random_bytes(buffer, len);
}
static inline void napi_schedule_irqoff(struct napi_struct *n)
{
napi_schedule(n);
}
#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
#endif /* Kernel 3.19 */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) \