Skip to content

Commit

Permalink
linux/ena: update ENA linux driver to version 1.5.0
Browse files Browse the repository at this point in the history
**New Features:**
* improve driver robustness - add mechanism for detection and recovery
       from lost/misrouted interrupt.

**Bug Fixes:**
* don't enable interrupts until ENA_FLAG_DEV_UP flag is set - this
       might potentially cause a race resulting in ignored interrupts.
* add error handling to ena_down() sequence - errors, if not handled
       correctly, might affect subsequent ena_open() procedure.
  • Loading branch information
NetanelBelgazal committed Nov 29, 2017
1 parent f0a6e5e commit 8770d55
Show file tree
Hide file tree
Showing 10 changed files with 129 additions and 27 deletions.
12 changes: 12 additions & 0 deletions kernel/linux/common/ena_com/ena_eth_com.c
Original file line number Diff line number Diff line change
Expand Up @@ -504,3 +504,15 @@ int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id)

return 0;
}

bool ena_com_cq_empty(struct ena_com_io_cq *io_cq)
{
struct ena_eth_io_rx_cdesc_base *cdesc;

cdesc = ena_com_get_next_rx_cdesc(io_cq);
if(cdesc)
return false;
else
return true;
}

2 changes: 2 additions & 0 deletions kernel/linux/common/ena_com/ena_eth_com.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,

int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id);

bool ena_com_cq_empty(struct ena_com_io_cq *io_cq);

static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
struct ena_eth_io_intr_reg *intr_reg)
{
Expand Down
2 changes: 2 additions & 0 deletions kernel/linux/common/ena_com/ena_regs_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ enum ena_regs_reset_reason_types {
ENA_REGS_RESET_USER_TRIGGER = 12,

ENA_REGS_RESET_GENERIC = 13,

ENA_REGS_RESET_MISS_INTERRUPT = 14,
};

/* ena_registers offsets */
Expand Down
5 changes: 4 additions & 1 deletion kernel/linux/ena/README
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,11 @@ RHEL / Open Suse / Ubuntu:
--------------------------
sudo vi /etc/modules-load.d/ena.conf
insert "ena" to the file
copy the ena.ko to /lib/modules/(uname -r)/
copy the ena.ko to /lib/modules/$(uname -r)/
sudo depmod
if previous driver was loaded from initramfs - it will have to be
updated as well (i.e. dracut)

restart the OS (sudo reboot and reconnect)
#endif

Expand Down
11 changes: 11 additions & 0 deletions kernel/linux/ena/RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ The driver was verified on the following distributions:
**SUSE:**
SUSE Linux Enterprise Server 12 SP2

## r1.5.0 release notes
**New Features:**
* improve driver robustness - add mechanism for detection and recovery
from lost/misrouted interrupt.

**Bug Fixes:**
* don't enable interrupts until ENA_FLAG_DEV_UP flag is set - this
might potentially cause a race resulting in ignored interrupts.
* add error handling to ena_down() sequence - errors, if not handled
correctly, might affect subsequent ena_open() procedure.

## r1.4.0 release notes
**New Features:**
* refactor check_missing_com_in_queue() - improve readability.
Expand Down
107 changes: 86 additions & 21 deletions kernel/linux/ena/ena_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ static struct workqueue_struct *ena_wq;
MODULE_DEVICE_TABLE(pci, ena_pci_tbl);

static int ena_rss_init_default(struct ena_adapter *adapter);
static void check_for_admin_com_state(struct ena_adapter *adapter);
static void ena_destroy_device(struct ena_adapter *adapter);
static int ena_restore_device(struct ena_adapter *adapter);

static void ena_tx_timeout(struct net_device *dev)
{
Expand Down Expand Up @@ -174,6 +177,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
ring->per_napi_packets = 0;
ring->per_napi_bytes = 0;
ring->cpu = 0;
ring->first_interrupt = false;
ring->no_interrupt_event_cnt = 0;
u64_stats_init(&ring->syncp);
}

Expand Down Expand Up @@ -1355,6 +1360,9 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
napi_schedule_irqoff(&ena_napi->napi);
#endif

ena_napi->tx_ring->first_interrupt = true;
ena_napi->rx_ring->first_interrupt = true;

return IRQ_HANDLED;
}

Expand Down Expand Up @@ -1708,7 +1716,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)

static int ena_up_complete(struct ena_adapter *adapter)
{
int rc, i;
int rc;

rc = ena_rss_configure(adapter);
if (rc)
Expand All @@ -1727,17 +1735,6 @@ static int ena_up_complete(struct ena_adapter *adapter)

ena_napi_enable_all(adapter);

/* Enable completion queues interrupt */
for (i = 0; i < adapter->num_queues; i++)
ena_unmask_interrupt(&adapter->tx_ring[i],
&adapter->rx_ring[i]);

/* schedule napi in case we had pending packets
* from the last time we disable napi
*/
for (i = 0; i < adapter->num_queues; i++)
napi_schedule(&adapter->ena_napi[i].napi);

return 0;
}

Expand Down Expand Up @@ -1874,7 +1871,7 @@ static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)

static int ena_up(struct ena_adapter *adapter)
{
int rc;
int rc, i;

netdev_dbg(adapter->netdev, "%s\n", __func__);

Expand Down Expand Up @@ -1917,6 +1914,17 @@ static int ena_up(struct ena_adapter *adapter)

set_bit(ENA_FLAG_DEV_UP, &adapter->flags);

/* Enable completion queues interrupt */
for (i = 0; i < adapter->num_queues; i++)
ena_unmask_interrupt(&adapter->tx_ring[i],
&adapter->rx_ring[i]);

/* schedule napi in case we had pending packets
* from the last time we disable napi
*/
for (i = 0; i < adapter->num_queues; i++)
napi_schedule(&adapter->ena_napi[i].napi);

return rc;

err_up:
Expand Down Expand Up @@ -2027,6 +2035,17 @@ static int ena_close(struct net_device *netdev)
if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
ena_down(adapter);

/* Check for device status and issue reset if needed*/
check_for_admin_com_state(adapter);
if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
netif_err(adapter, ifdown, adapter->netdev,
"Destroy failure, restarting device\n");
ena_dump_stats_to_dmesg(adapter);
/* rtnl lock already obtained in dev_ioctl() layer */
ena_destroy_device(adapter);
ena_restore_device(adapter);
}

return 0;
}

Expand Down Expand Up @@ -2836,11 +2855,12 @@ static void ena_destroy_device(struct ena_adapter *adapter)
ena_sysfs_terminate(&adapter->pdev->dev);
ena_com_set_admin_running_state(ena_dev, false);

ena_close(netdev);
if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
ena_down(adapter);

/* Before releasing the ENA resources, a device reset is required.
* (to prevent the device from accessing them).
* In case the reset flag is set and the device is up, ena_close
* In case the reset flag is set and the device is up, ena_down()
* already perform the reset, so it can be skipped.
*/
if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
Expand Down Expand Up @@ -2947,7 +2967,30 @@ static void ena_fw_reset_device(struct work_struct *work)
rtnl_unlock();
}

static int check_missing_comp_in_queue(struct ena_adapter *adapter,
static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
struct ena_ring *rx_ring)
{
if (likely(rx_ring->first_interrupt))
return 0;

if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
return 0;

rx_ring->no_interrupt_event_cnt++;

if (rx_ring->no_interrupt_event_cnt == ENA_MAX_NO_INTERRUPT_ITERATIONS) {
netif_err(adapter, rx_err, adapter->netdev,
"Potential MSIX issue on Rx side Queue = %d. Reset the device\n",
rx_ring->qid);
adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
return -EIO;
}

return 0;
}

static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
struct ena_ring *tx_ring)
{
struct ena_tx_buffer *tx_buf;
Expand All @@ -2958,8 +3001,24 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
for (i = 0; i < tx_ring->ring_size; i++) {
tx_buf = &tx_ring->tx_buffer_info[i];
last_jiffies = tx_buf->last_jiffies;
if (unlikely(last_jiffies &&
time_is_before_jiffies(last_jiffies + adapter->missing_tx_completion_to))) {

if (last_jiffies == 0)
/* no pending Tx at this location */
continue;

if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
2 * adapter->missing_tx_completion_to))) {
/* If after graceful period interrupt is still not received, we schedule a reset*/
netif_err(adapter, tx_err, adapter->netdev,
"Potential MSIX issue on Tx side Queue = %d. Reset the device\n",
tx_ring->qid);
adapter->reset_reason = ENA_REGS_RESET_MISS_INTERRUPT;
set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
return -EIO;
}

if (unlikely(time_is_before_jiffies(last_jiffies +
adapter->missing_tx_completion_to))) {
if (!tx_buf->print_once)
netif_notice(adapter, tx_err, adapter->netdev,
"Found a Tx that wasn't completed on time, qid %d, index %d.\n",
Expand Down Expand Up @@ -2988,9 +3047,10 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
return rc;
}

static void check_for_missing_tx_completions(struct ena_adapter *adapter)
static void check_for_missing_completions(struct ena_adapter *adapter)
{
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
int i, budget, rc;

/* Make sure the driver doesn't turn the device in other process */
Expand All @@ -3009,8 +3069,13 @@ static void check_for_missing_tx_completions(struct ena_adapter *adapter)

for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) {
tx_ring = &adapter->tx_ring[i];
rx_ring = &adapter->rx_ring[i];

rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
if (unlikely(rc))
return;

rc = check_missing_comp_in_queue(adapter, tx_ring);
rc = check_for_rx_interrupt_queue(adapter, rx_ring);
if (unlikely(rc))
return;

Expand Down Expand Up @@ -3169,7 +3234,7 @@ static void ena_timer_service(unsigned long data)

check_for_admin_com_state(adapter);

check_for_missing_tx_completions(adapter);
check_for_missing_completions(adapter);

check_for_empty_rx_ring(adapter);

Expand Down
6 changes: 5 additions & 1 deletion kernel/linux/ena/ena_netdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
#include "ena_eth_com.h"

#define DRV_MODULE_VER_MAJOR 1
#define DRV_MODULE_VER_MINOR 4
#define DRV_MODULE_VER_MINOR 5
#define DRV_MODULE_VER_SUBMINOR 0

#define DRV_MODULE_NAME "ena"
Expand Down Expand Up @@ -124,6 +124,7 @@
* We wait for 6 sec just to be on the safe side.
*/
#define ENA_DEVICE_KALIVE_TIMEOUT (6 * HZ)
#define ENA_MAX_NO_INTERRUPT_ITERATIONS 3

#define ENA_MMIO_DISABLE_REG_READ BIT(0)

Expand Down Expand Up @@ -246,6 +247,9 @@ struct ena_ring {
/* The maximum header length the device can handle */
u8 tx_max_header_size;

bool first_interrupt;
u16 no_interrupt_event_cnt;

/* cpu for TPH */
int cpu;
/* number of tx/rx_buffer_info's entries */
Expand Down
2 changes: 1 addition & 1 deletion kernel/linux/rpm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Author: Cristian Gafton <gafton@amazon.com>

NAME = ena
VERSION = 1.4.0
VERSION = 1.5.0

TOPDIR := $(shell git rev-parse --show-toplevel)

Expand Down
2 changes: 1 addition & 1 deletion kernel/linux/rpm/README-rpm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Once the pre-requisites have been installed, you can simply issue a
"make" in this directory to build the kmod src.rpm package:

bash$ make
cd .. && git archive --format=tar --prefix=ena-1.4.0/ -o rpm/ena-1.4.0.tar ena_linux_1.4.0
cd .. && git archive --format=tar --prefix=ena-1.5.0/ -o rpm/ena-1.5.0.tar ena_linux_1.5.0
rpmbuild -bs \
--define '_topdir %(pwd)' --define '_ntopdir %(pwd)' \
--define '_builddir %{_ntopdir}' \
Expand Down
7 changes: 5 additions & 2 deletions kernel/linux/rpm/ena.spec
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
%define kmod_name ena
%define kmod_driver_version 1.4.0
%define kmod_driver_version 1.5.0
%define kmod_rpm_release 1
%define kmod_git_hash 3ac3e0bf079b2c0468f759f2213541e214a6dd77
%define kmod_kbuild_dir kernel/linux/ena
Expand All @@ -22,7 +22,7 @@ Source7: preamble

Name: %{kmod_name}
Version: %{kmod_driver_version}
Release: %{kmod_rpm_release}%{?dist}.3
Release: %{kmod_rpm_release}%{?dist}.4
Summary: %{kmod_name} kernel module

Group: System/Kernel
Expand Down Expand Up @@ -99,6 +99,9 @@ install -m 644 -D source/%{kmod_kbuild_dir}/RELEASENOTES.md $RPM_BUILD_ROOT/usr/
rm -rf $RPM_BUILD_ROOT

%changelog
* Wed Nov 29 2017 Netanel Belgazal netanel@amazon.com - 1.4.0-1.4
- Update ENA driver to version 1.5.0

* Mon Nov 13 2017 Netanel Belgazal netanel@amazon.com - 1.4.0-1.3
- Update ENA driver to version 1.4.0

Expand Down

0 comments on commit 8770d55

Please sign in to comment.