Skip to content

Commit 6c2b374

Browse files
yanminzhgregkh
authored andcommitted
PCI-Express AER implemetation: AER core and aerdriver
Patch 3 implements the core part of PCI-Express AER and aerdrv port service driver. When a root port service device is probed, the aerdrv will call request_irq to register irq handler for AER error interrupt. When a device sends an PCI-Express error message to the root port, the root port will trigger an interrupt, by either MSI or IO-APIC, then kernel would run the irq handler. The handler collects root error status register and schedules a work. The work will call the core part to process the error based on its type (Correctable/non-fatal/fatal). As for Correctable errors, the patch chooses to just clear the correctable error status register of the device. As for the non-fatal error, the patch follows generic PCI error handler rules to call the error callback functions of the endpoint's driver. If the device is a bridge, the patch chooses to broadcast the error to downstream devices. As for the fatal error, the patch resets the pci-express link and follows generic PCI error handler rules to call the error callback functions of the endpoint's driver. If the device is a bridge, the patch chooses to broadcast the error to downstream devices. Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
1 parent 4840815 commit 6c2b374

File tree

11 files changed

+1598
-0
lines changed

11 files changed

+1598
-0
lines changed

drivers/pci/pcie/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,4 @@ config HOTPLUG_PCI_PCIE_POLL_EVENT_MODE
3434

3535
When in doubt, say N.
3636

37+
source "drivers/pci/pcie/aer/Kconfig"

drivers/pci/pcie/Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@
55
pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o
66

77
obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o
8+
9+
# Build PCI Express AER if needed
10+
obj-$(CONFIG_PCIEAER) += aer/

drivers/pci/pcie/aer/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#
2+
# PCI Express Root Port Device AER Configuration
3+
#
4+
5+
config PCIEAER
6+
boolean "Root Port Advanced Error Reporting support"
7+
depends on PCIEPORTBUS && ACPI
8+
default y
9+
help
10+
This enables PCI Express Root Port Advanced Error Reporting
11+
(AER) driver support. Error reporting messages sent to Root
12+
Port will be handled by PCI Express AER driver.

drivers/pci/pcie/aer/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#
2+
# Makefile for PCI-Express Root Port Advanced Error Reporting Driver
3+
#
4+
5+
obj-$(CONFIG_PCIEAER) += aerdriver.o
6+
7+
aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o aerdrv_acpi.o
8+

drivers/pci/pcie/aer/aerdrv.c

Lines changed: 346 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,346 @@
1+
/*
2+
* drivers/pci/pcie/aer/aerdrv.c
3+
*
4+
* This file is subject to the terms and conditions of the GNU General Public
5+
* License. See the file "COPYING" in the main directory of this archive
6+
* for more details.
7+
*
8+
* This file implements the AER root port service driver. The driver will
9+
* register an irq handler. When root port triggers an AER interrupt, the irq
10+
* handler will collect root port status and schedule a work.
11+
*
12+
* Copyright (C) 2006 Intel Corp.
13+
* Tom Long Nguyen (tom.l.nguyen@intel.com)
14+
* Zhang Yanmin (yanmin.zhang@intel.com)
15+
*
16+
*/
17+
18+
#include <linux/module.h>
19+
#include <linux/pci.h>
20+
#include <linux/kernel.h>
21+
#include <linux/errno.h>
22+
#include <linux/pm.h>
23+
#include <linux/init.h>
24+
#include <linux/interrupt.h>
25+
#include <linux/delay.h>
26+
#include <linux/pcieport_if.h>
27+
28+
#include "aerdrv.h"
29+
30+
/*
31+
* Version Information
32+
*/
33+
#define DRIVER_VERSION "v1.0"
34+
#define DRIVER_AUTHOR "tom.l.nguyen@intel.com"
35+
#define DRIVER_DESC "Root Port Advanced Error Reporting Driver"
36+
MODULE_AUTHOR(DRIVER_AUTHOR);
37+
MODULE_DESCRIPTION(DRIVER_DESC);
38+
MODULE_LICENSE("GPL");
39+
40+
static int __devinit aer_probe (struct pcie_device *dev,
41+
const struct pcie_port_service_id *id );
42+
static void aer_remove(struct pcie_device *dev);
43+
static int aer_suspend(struct pcie_device *dev, pm_message_t state)
44+
{return 0;}
45+
static int aer_resume(struct pcie_device *dev) {return 0;}
46+
static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
47+
enum pci_channel_state error);
48+
static void aer_error_resume(struct pci_dev *dev);
49+
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
50+
51+
/*
52+
* PCI Express bus's AER Root service driver data structure
53+
*/
54+
static struct pcie_port_service_id aer_id[] = {
55+
{
56+
.vendor = PCI_ANY_ID,
57+
.device = PCI_ANY_ID,
58+
.port_type = PCIE_RC_PORT,
59+
.service_type = PCIE_PORT_SERVICE_AER,
60+
},
61+
{ /* end: all zeroes */ }
62+
};
63+
64+
static struct pci_error_handlers aer_error_handlers = {
65+
.error_detected = aer_error_detected,
66+
.resume = aer_error_resume,
67+
};
68+
69+
static struct pcie_port_service_driver aerdrv = {
70+
.name = "aer",
71+
.id_table = &aer_id[0],
72+
73+
.probe = aer_probe,
74+
.remove = aer_remove,
75+
76+
.suspend = aer_suspend,
77+
.resume = aer_resume,
78+
79+
.err_handler = &aer_error_handlers,
80+
81+
.reset_link = aer_root_reset,
82+
};
83+
84+
/**
85+
* aer_irq - Root Port's ISR
86+
* @irq: IRQ assigned to Root Port
87+
* @context: pointer to Root Port data structure
88+
* @r: pointer struct pt_regs
89+
*
90+
* Invoked when Root Port detects AER messages.
91+
**/
92+
static irqreturn_t aer_irq(int irq, void *context, struct pt_regs * r)
93+
{
94+
unsigned int status, id;
95+
struct pcie_device *pdev = (struct pcie_device *)context;
96+
struct aer_rpc *rpc = get_service_data(pdev);
97+
int next_prod_idx;
98+
unsigned long flags;
99+
int pos;
100+
101+
pos = pci_find_aer_capability(pdev->port);
102+
/*
103+
* Must lock access to Root Error Status Reg, Root Error ID Reg,
104+
* and Root error producer/consumer index
105+
*/
106+
spin_lock_irqsave(&rpc->e_lock, flags);
107+
108+
/* Read error status */
109+
pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status);
110+
if (!(status & ROOT_ERR_STATUS_MASKS)) {
111+
spin_unlock_irqrestore(&rpc->e_lock, flags);
112+
return IRQ_NONE;
113+
}
114+
115+
/* Read error source and clear error status */
116+
pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_COR_SRC, &id);
117+
pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status);
118+
119+
/* Store error source for later DPC handler */
120+
next_prod_idx = rpc->prod_idx + 1;
121+
if (next_prod_idx == AER_ERROR_SOURCES_MAX)
122+
next_prod_idx = 0;
123+
if (next_prod_idx == rpc->cons_idx) {
124+
/*
125+
* Error Storm Condition - possibly the same error occurred.
126+
* Drop the error.
127+
*/
128+
spin_unlock_irqrestore(&rpc->e_lock, flags);
129+
return IRQ_HANDLED;
130+
}
131+
rpc->e_sources[rpc->prod_idx].status = status;
132+
rpc->e_sources[rpc->prod_idx].id = id;
133+
rpc->prod_idx = next_prod_idx;
134+
spin_unlock_irqrestore(&rpc->e_lock, flags);
135+
136+
/* Invoke DPC handler */
137+
schedule_work(&rpc->dpc_handler);
138+
139+
return IRQ_HANDLED;
140+
}
141+
142+
/**
143+
* aer_alloc_rpc - allocate Root Port data structure
144+
* @dev: pointer to the pcie_dev data structure
145+
*
146+
* Invoked when Root Port's AER service is loaded.
147+
**/
148+
static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev)
149+
{
150+
struct aer_rpc *rpc;
151+
152+
if (!(rpc = (struct aer_rpc *)kmalloc(sizeof(struct aer_rpc),
153+
GFP_KERNEL)))
154+
return NULL;
155+
156+
memset(rpc, 0, sizeof(struct aer_rpc));
157+
/*
158+
* Initialize Root lock access, e_lock, to Root Error Status Reg,
159+
* Root Error ID Reg, and Root error producer/consumer index.
160+
*/
161+
rpc->e_lock = SPIN_LOCK_UNLOCKED;
162+
163+
rpc->rpd = dev;
164+
INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev);
165+
rpc->prod_idx = rpc->cons_idx = 0;
166+
mutex_init(&rpc->rpc_mutex);
167+
init_waitqueue_head(&rpc->wait_release);
168+
169+
/* Use PCIE bus function to store rpc into PCIE device */
170+
set_service_data(dev, rpc);
171+
172+
return rpc;
173+
}
174+
175+
/**
176+
* aer_remove - clean up resources
177+
* @dev: pointer to the pcie_dev data structure
178+
*
179+
* Invoked when PCI Express bus unloads or AER probe fails.
180+
**/
181+
static void aer_remove(struct pcie_device *dev)
182+
{
183+
struct aer_rpc *rpc = get_service_data(dev);
184+
185+
if (rpc) {
186+
/* If register interrupt service, it must be free. */
187+
if (rpc->isr)
188+
free_irq(dev->irq, dev);
189+
190+
wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx);
191+
192+
aer_delete_rootport(rpc);
193+
set_service_data(dev, NULL);
194+
}
195+
}
196+
197+
/**
198+
* aer_probe - initialize resources
199+
* @dev: pointer to the pcie_dev data structure
200+
* @id: pointer to the service id data structure
201+
*
202+
* Invoked when PCI Express bus loads AER service driver.
203+
**/
204+
static int __devinit aer_probe (struct pcie_device *dev,
205+
const struct pcie_port_service_id *id )
206+
{
207+
int status;
208+
struct aer_rpc *rpc;
209+
struct device *device = &dev->device;
210+
211+
/* Init */
212+
if ((status = aer_init(dev)))
213+
return status;
214+
215+
/* Alloc rpc data structure */
216+
if (!(rpc = aer_alloc_rpc(dev))) {
217+
printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n",
218+
__FUNCTION__, device->bus_id);
219+
aer_remove(dev);
220+
return -ENOMEM;
221+
}
222+
223+
/* Request IRQ ISR */
224+
if ((status = request_irq(dev->irq, aer_irq, SA_SHIRQ, "aerdrv",
225+
dev))) {
226+
printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n",
227+
__FUNCTION__, device->bus_id);
228+
aer_remove(dev);
229+
return status;
230+
}
231+
232+
rpc->isr = 1;
233+
234+
aer_enable_rootport(rpc);
235+
236+
return status;
237+
}
238+
239+
/**
240+
* aer_root_reset - reset link on Root Port
241+
* @dev: pointer to Root Port's pci_dev data structure
242+
*
243+
* Invoked by Port Bus driver when performing link reset at Root Port.
244+
**/
245+
static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
246+
{
247+
u16 p2p_ctrl;
248+
u32 status;
249+
int pos;
250+
251+
pos = pci_find_aer_capability(dev);
252+
253+
/* Disable Root's interrupt in response to error messages */
254+
pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0);
255+
256+
/* Assert Secondary Bus Reset */
257+
pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl);
258+
p2p_ctrl |= PCI_CB_BRIDGE_CTL_CB_RESET;
259+
pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl);
260+
261+
/* De-assert Secondary Bus Reset */
262+
p2p_ctrl &= ~PCI_CB_BRIDGE_CTL_CB_RESET;
263+
pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl);
264+
265+
/*
266+
* System software must wait for at least 100ms from the end
267+
* of a reset of one or more device before it is permitted
268+
* to issue Configuration Requests to those devices.
269+
*/
270+
msleep(200);
271+
printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id);
272+
273+
/* Enable Root Port's interrupt in response to error messages */
274+
pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
275+
pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status);
276+
pci_write_config_dword(dev,
277+
pos + PCI_ERR_ROOT_COMMAND,
278+
ROOT_PORT_INTR_ON_MESG_MASK);
279+
280+
return PCI_ERS_RESULT_RECOVERED;
281+
}
282+
283+
/**
284+
* aer_error_detected - update severity status
285+
* @dev: pointer to Root Port's pci_dev data structure
286+
* @error: error severity being notified by port bus
287+
*
288+
* Invoked by Port Bus driver during error recovery.
289+
**/
290+
static pci_ers_result_t aer_error_detected(struct pci_dev *dev,
291+
enum pci_channel_state error)
292+
{
293+
/* Root Port has no impact. Always recovers. */
294+
return PCI_ERS_RESULT_CAN_RECOVER;
295+
}
296+
297+
/**
298+
* aer_error_resume - clean up corresponding error status bits
299+
* @dev: pointer to Root Port's pci_dev data structure
300+
*
301+
* Invoked by Port Bus driver during nonfatal recovery.
302+
**/
303+
static void aer_error_resume(struct pci_dev *dev)
304+
{
305+
int pos;
306+
u32 status, mask;
307+
u16 reg16;
308+
309+
/* Clean up Root device status */
310+
pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
311+
pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &reg16);
312+
pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16);
313+
314+
/* Clean AER Root Error Status */
315+
pos = pci_find_aer_capability(dev);
316+
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
317+
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
318+
if (dev->error_state == pci_channel_io_normal)
319+
status &= ~mask; /* Clear corresponding nonfatal bits */
320+
else
321+
status &= mask; /* Clear corresponding fatal bits */
322+
pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
323+
}
324+
325+
/**
326+
* aer_service_init - register AER root service driver
327+
*
328+
* Invoked when AER root service driver is loaded.
329+
**/
330+
static int __init aer_service_init(void)
331+
{
332+
return pcie_port_service_register(&aerdrv);
333+
}
334+
335+
/**
336+
* aer_service_exit - unregister AER root service driver
337+
*
338+
* Invoked when AER root service driver is unloaded.
339+
**/
340+
static void __exit aer_service_exit(void)
341+
{
342+
pcie_port_service_unregister(&aerdrv);
343+
}
344+
345+
module_init(aer_service_init);
346+
module_exit(aer_service_exit);

0 commit comments

Comments
 (0)