Skip to content

Commit 212d16c

Browse files
Gavin Shanozbenh
authored andcommitted
powerpc/eeh: EEH support for VFIO PCI device
The patch exports functions to be used by new VFIO ioctl command, which will be introduced in subsequent patch, to support EEH functinality for VFIO PCI devices. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Acked-by: Alexander Graf <agraf@suse.de> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
1 parent 05ec424 commit 212d16c

File tree

2 files changed

+280
-0
lines changed

2 files changed

+280
-0
lines changed

arch/powerpc/include/asm/eeh.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,11 @@ enum {
172172
#define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */
173173
#define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */
174174
#define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */
175+
#define EEH_PE_STATE_NORMAL 0 /* Normal state */
176+
#define EEH_PE_STATE_RESET 1 /* PE reset asserted */
177+
#define EEH_PE_STATE_STOPPED_IO_DMA 2 /* Frozen PE */
178+
#define EEH_PE_STATE_STOPPED_DMA 4 /* Stopped DMA, Enabled IO */
179+
#define EEH_PE_STATE_UNAVAIL 5 /* Unavailable */
175180
#define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */
176181
#define EEH_RESET_HOT 1 /* Hot reset */
177182
#define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */
@@ -279,6 +284,13 @@ void eeh_add_device_late(struct pci_dev *);
279284
void eeh_add_device_tree_late(struct pci_bus *);
280285
void eeh_add_sysfs_files(struct pci_bus *);
281286
void eeh_remove_device(struct pci_dev *);
287+
int eeh_dev_open(struct pci_dev *pdev);
288+
void eeh_dev_release(struct pci_dev *pdev);
289+
struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group);
290+
int eeh_pe_set_option(struct eeh_pe *pe, int option);
291+
int eeh_pe_get_state(struct eeh_pe *pe);
292+
int eeh_pe_reset(struct eeh_pe *pe, int option);
293+
int eeh_pe_configure(struct eeh_pe *pe);
282294

283295
/**
284296
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.

arch/powerpc/kernel/eeh.c

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include <asm/eeh.h>
4141
#include <asm/eeh_event.h>
4242
#include <asm/io.h>
43+
#include <asm/iommu.h>
4344
#include <asm/machdep.h>
4445
#include <asm/ppc-pci.h>
4546
#include <asm/rtas.h>
@@ -108,6 +109,9 @@ struct eeh_ops *eeh_ops = NULL;
108109
/* Lock to avoid races due to multiple reports of an error */
109110
DEFINE_RAW_SPINLOCK(confirm_error_lock);
110111

112+
/* Lock to protect passed flags */
113+
static DEFINE_MUTEX(eeh_dev_mutex);
114+
111115
/* Buffer for reporting pci register dumps. Its here in BSS, and
112116
* not dynamically alloced, so that it ends up in RMO where RTAS
113117
* can access it.
@@ -1108,6 +1112,270 @@ void eeh_remove_device(struct pci_dev *dev)
11081112
edev->mode &= ~EEH_DEV_SYSFS;
11091113
}
11101114

1115+
/**
1116+
* eeh_dev_open - Increase count of pass through devices for PE
1117+
* @pdev: PCI device
1118+
*
1119+
* Increase count of passed through devices for the indicated
1120+
* PE. In the result, the EEH errors detected on the PE won't be
1121+
* reported. The PE owner will be responsible for detection
1122+
* and recovery.
1123+
*/
1124+
int eeh_dev_open(struct pci_dev *pdev)
1125+
{
1126+
struct eeh_dev *edev;
1127+
1128+
mutex_lock(&eeh_dev_mutex);
1129+
1130+
/* No PCI device ? */
1131+
if (!pdev)
1132+
goto out;
1133+
1134+
/* No EEH device or PE ? */
1135+
edev = pci_dev_to_eeh_dev(pdev);
1136+
if (!edev || !edev->pe)
1137+
goto out;
1138+
1139+
/* Increase PE's pass through count */
1140+
atomic_inc(&edev->pe->pass_dev_cnt);
1141+
mutex_unlock(&eeh_dev_mutex);
1142+
1143+
return 0;
1144+
out:
1145+
mutex_unlock(&eeh_dev_mutex);
1146+
return -ENODEV;
1147+
}
1148+
EXPORT_SYMBOL_GPL(eeh_dev_open);
1149+
1150+
/**
1151+
* eeh_dev_release - Decrease count of pass through devices for PE
1152+
* @pdev: PCI device
1153+
*
1154+
* Decrease count of pass through devices for the indicated PE. If
1155+
* there is no passed through device in PE, the EEH errors detected
1156+
* on the PE will be reported and handled as usual.
1157+
*/
1158+
void eeh_dev_release(struct pci_dev *pdev)
1159+
{
1160+
struct eeh_dev *edev;
1161+
1162+
mutex_lock(&eeh_dev_mutex);
1163+
1164+
/* No PCI device ? */
1165+
if (!pdev)
1166+
goto out;
1167+
1168+
/* No EEH device ? */
1169+
edev = pci_dev_to_eeh_dev(pdev);
1170+
if (!edev || !edev->pe || !eeh_pe_passed(edev->pe))
1171+
goto out;
1172+
1173+
/* Decrease PE's pass through count */
1174+
atomic_dec(&edev->pe->pass_dev_cnt);
1175+
WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
1176+
out:
1177+
mutex_unlock(&eeh_dev_mutex);
1178+
}
1179+
EXPORT_SYMBOL(eeh_dev_release);
1180+
1181+
/**
1182+
* eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE
1183+
* @group: IOMMU group
1184+
*
1185+
* The routine is called to convert IOMMU group to EEH PE.
1186+
*/
1187+
struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group)
1188+
{
1189+
struct iommu_table *tbl;
1190+
struct pci_dev *pdev = NULL;
1191+
struct eeh_dev *edev;
1192+
bool found = false;
1193+
1194+
/* No IOMMU group ? */
1195+
if (!group)
1196+
return NULL;
1197+
1198+
/* No PCI device ? */
1199+
for_each_pci_dev(pdev) {
1200+
tbl = get_iommu_table_base(&pdev->dev);
1201+
if (tbl && tbl->it_group == group) {
1202+
found = true;
1203+
break;
1204+
}
1205+
}
1206+
if (!found)
1207+
return NULL;
1208+
1209+
/* No EEH device or PE ? */
1210+
edev = pci_dev_to_eeh_dev(pdev);
1211+
if (!edev || !edev->pe)
1212+
return NULL;
1213+
1214+
return edev->pe;
1215+
}
1216+
1217+
/**
1218+
* eeh_pe_set_option - Set options for the indicated PE
1219+
* @pe: EEH PE
1220+
* @option: requested option
1221+
*
1222+
* The routine is called to enable or disable EEH functionality
1223+
* on the indicated PE, to enable IO or DMA for the frozen PE.
1224+
*/
1225+
int eeh_pe_set_option(struct eeh_pe *pe, int option)
1226+
{
1227+
int ret = 0;
1228+
1229+
/* Invalid PE ? */
1230+
if (!pe)
1231+
return -ENODEV;
1232+
1233+
/*
1234+
* EEH functionality could possibly be disabled, just
1235+
* return error for the case. And the EEH functinality
1236+
* isn't expected to be disabled on one specific PE.
1237+
*/
1238+
switch (option) {
1239+
case EEH_OPT_ENABLE:
1240+
if (eeh_enabled())
1241+
break;
1242+
ret = -EIO;
1243+
break;
1244+
case EEH_OPT_DISABLE:
1245+
break;
1246+
case EEH_OPT_THAW_MMIO:
1247+
case EEH_OPT_THAW_DMA:
1248+
if (!eeh_ops || !eeh_ops->set_option) {
1249+
ret = -ENOENT;
1250+
break;
1251+
}
1252+
1253+
ret = eeh_ops->set_option(pe, option);
1254+
break;
1255+
default:
1256+
pr_debug("%s: Option %d out of range (%d, %d)\n",
1257+
__func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA);
1258+
ret = -EINVAL;
1259+
}
1260+
1261+
return ret;
1262+
}
1263+
EXPORT_SYMBOL_GPL(eeh_pe_set_option);
1264+
1265+
/**
1266+
* eeh_pe_get_state - Retrieve PE's state
1267+
* @pe: EEH PE
1268+
*
1269+
* Retrieve the PE's state, which includes 3 aspects: enabled
1270+
* DMA, enabled IO and asserted reset.
1271+
*/
1272+
int eeh_pe_get_state(struct eeh_pe *pe)
1273+
{
1274+
int result, ret = 0;
1275+
bool rst_active, dma_en, mmio_en;
1276+
1277+
/* Existing PE ? */
1278+
if (!pe)
1279+
return -ENODEV;
1280+
1281+
if (!eeh_ops || !eeh_ops->get_state)
1282+
return -ENOENT;
1283+
1284+
result = eeh_ops->get_state(pe, NULL);
1285+
rst_active = !!(result & EEH_STATE_RESET_ACTIVE);
1286+
dma_en = !!(result & EEH_STATE_DMA_ENABLED);
1287+
mmio_en = !!(result & EEH_STATE_MMIO_ENABLED);
1288+
1289+
if (rst_active)
1290+
ret = EEH_PE_STATE_RESET;
1291+
else if (dma_en && mmio_en)
1292+
ret = EEH_PE_STATE_NORMAL;
1293+
else if (!dma_en && !mmio_en)
1294+
ret = EEH_PE_STATE_STOPPED_IO_DMA;
1295+
else if (!dma_en && mmio_en)
1296+
ret = EEH_PE_STATE_STOPPED_DMA;
1297+
else
1298+
ret = EEH_PE_STATE_UNAVAIL;
1299+
1300+
return ret;
1301+
}
1302+
EXPORT_SYMBOL_GPL(eeh_pe_get_state);
1303+
1304+
/**
1305+
* eeh_pe_reset - Issue PE reset according to specified type
1306+
* @pe: EEH PE
1307+
* @option: reset type
1308+
*
1309+
* The routine is called to reset the specified PE with the
1310+
* indicated type, either fundamental reset or hot reset.
1311+
* PE reset is the most important part for error recovery.
1312+
*/
1313+
int eeh_pe_reset(struct eeh_pe *pe, int option)
1314+
{
1315+
int ret = 0;
1316+
1317+
/* Invalid PE ? */
1318+
if (!pe)
1319+
return -ENODEV;
1320+
1321+
if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset)
1322+
return -ENOENT;
1323+
1324+
switch (option) {
1325+
case EEH_RESET_DEACTIVATE:
1326+
ret = eeh_ops->reset(pe, option);
1327+
if (ret)
1328+
break;
1329+
1330+
/*
1331+
* The PE is still in frozen state and we need to clear
1332+
* that. It's good to clear frozen state after deassert
1333+
* to avoid messy IO access during reset, which might
1334+
* cause recursive frozen PE.
1335+
*/
1336+
ret = eeh_ops->set_option(pe, EEH_OPT_THAW_MMIO);
1337+
if (!ret)
1338+
ret = eeh_ops->set_option(pe, EEH_OPT_THAW_DMA);
1339+
if (!ret)
1340+
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
1341+
break;
1342+
case EEH_RESET_HOT:
1343+
case EEH_RESET_FUNDAMENTAL:
1344+
ret = eeh_ops->reset(pe, option);
1345+
break;
1346+
default:
1347+
pr_debug("%s: Unsupported option %d\n",
1348+
__func__, option);
1349+
ret = -EINVAL;
1350+
}
1351+
1352+
return ret;
1353+
}
1354+
EXPORT_SYMBOL_GPL(eeh_pe_reset);
1355+
1356+
/**
1357+
* eeh_pe_configure - Configure PCI bridges after PE reset
1358+
* @pe: EEH PE
1359+
*
1360+
* The routine is called to restore the PCI config space for
1361+
* those PCI devices, especially PCI bridges affected by PE
1362+
* reset issued previously.
1363+
*/
1364+
int eeh_pe_configure(struct eeh_pe *pe)
1365+
{
1366+
int ret = 0;
1367+
1368+
/* Invalid PE ? */
1369+
if (!pe)
1370+
return -ENODEV;
1371+
1372+
/* Restore config space for the affected devices */
1373+
eeh_pe_restore_bars(pe);
1374+
1375+
return ret;
1376+
}
1377+
EXPORT_SYMBOL_GPL(eeh_pe_configure);
1378+
11111379
static int proc_eeh_show(struct seq_file *m, void *v)
11121380
{
11131381
if (!eeh_enabled()) {

0 commit comments

Comments
 (0)