Skip to content

Commit e53951f

Browse files
committed
EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2165650 Tested: tested with the EMR machine we have in the lab commit acd4cf6 Author: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Date: Fri Jul 22 16:33:36 2022 -0700 EDAC/i10nm: Retrieve and print retry_rd_err_log registers for HBM An HBM memory channel is divided into two pseudo channels. Each pseudo channel has its own retry_rd_err_log registers. Retrieve and print retry_rd_err_log registers of the HBM pseudo channel if the memory error is from HBM. Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com> Link: https://lore.kernel.org/all/20220722233338.341567-1-tony.luck@intel.com Signed-off-by: Aristeu Rozanski <arozansk@redhat.com>
1 parent 49b12d5 commit e53951f

File tree

2 files changed

+71
-17
lines changed

2 files changed

+71
-17
lines changed

drivers/edac/i10nm_base.c

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,18 +77,20 @@ static int retry_rd_err_log;
7777

7878
static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
7979
static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
80+
static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
81+
static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
8082
static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
8183
static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
84+
static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
85+
static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
8286

83-
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable)
87+
static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
88+
u32 *offsets_scrub, u32 *offsets_demand)
8489
{
8590
u32 s, d;
8691

87-
if (!imc->mbase)
88-
return;
89-
90-
s = I10NM_GET_REG32(imc, chan, res_cfg->offsets_scrub[0]);
91-
d = I10NM_GET_REG32(imc, chan, res_cfg->offsets_demand[0]);
92+
s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
93+
d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
9294

9395
if (enable) {
9496
/* Save default configurations */
@@ -115,21 +117,39 @@ static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable
115117
d &= ~RETRY_RD_ERR_LOG_EN;
116118
}
117119

118-
I10NM_SET_REG32(imc, chan, res_cfg->offsets_scrub[0], s);
119-
I10NM_SET_REG32(imc, chan, res_cfg->offsets_demand[0], d);
120+
I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
121+
I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
120122
}
121123

122124
static void enable_retry_rd_err_log(bool enable)
123125
{
126+
struct skx_imc *imc;
124127
struct skx_dev *d;
125128
int i, j;
126129

127130
edac_dbg(2, "\n");
128131

129132
list_for_each_entry(d, i10nm_edac_list, list)
130-
for (i = 0; i < I10NM_NUM_IMC; i++)
131-
for (j = 0; j < I10NM_NUM_CHANNELS; j++)
132-
__enable_retry_rd_err_log(&d->imc[i], j, enable);
133+
for (i = 0; i < I10NM_NUM_IMC; i++) {
134+
imc = &d->imc[i];
135+
if (!imc->mbase)
136+
continue;
137+
138+
for (j = 0; j < I10NM_NUM_CHANNELS; j++) {
139+
if (imc->hbm_mc) {
140+
__enable_retry_rd_err_log(imc, j, enable,
141+
res_cfg->offsets_scrub_hbm0,
142+
res_cfg->offsets_demand_hbm0);
143+
__enable_retry_rd_err_log(imc, j, enable,
144+
res_cfg->offsets_scrub_hbm1,
145+
res_cfg->offsets_demand_hbm1);
146+
} else {
147+
__enable_retry_rd_err_log(imc, j, enable,
148+
res_cfg->offsets_scrub,
149+
res_cfg->offsets_demand);
150+
}
151+
}
152+
}
133153
}
134154

135155
static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
@@ -140,12 +160,24 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
140160
u32 corr0, corr1, corr2, corr3;
141161
u64 log2a, log5;
142162
u32 *offsets;
143-
int n;
163+
int n, pch;
144164

145165
if (!imc->mbase)
146166
return;
147167

148-
offsets = scrub_err ? res_cfg->offsets_scrub : res_cfg->offsets_demand;
168+
if (imc->hbm_mc) {
169+
pch = res->cs & 1;
170+
171+
if (pch)
172+
offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
173+
res_cfg->offsets_demand_hbm1;
174+
else
175+
offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
176+
res_cfg->offsets_demand_hbm0;
177+
} else {
178+
offsets = scrub_err ? res_cfg->offsets_scrub :
179+
res_cfg->offsets_demand;
180+
}
149181

150182
log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
151183
log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
@@ -163,10 +195,24 @@ static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
163195
log0, log1, log2, log3, log4, log5);
164196
}
165197

166-
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
167-
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
168-
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
169-
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
198+
if (imc->hbm_mc) {
199+
if (pch) {
200+
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
201+
corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
202+
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
203+
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
204+
} else {
205+
corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
206+
corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
207+
corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
208+
corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
209+
}
210+
} else {
211+
corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
212+
corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
213+
corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
214+
corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
215+
}
170216

171217
if (len - n > 0)
172218
snprintf(msg + n, len - n,
@@ -411,7 +457,11 @@ static struct res_config spr_cfg = {
411457
.sad_all_devfn = PCI_DEVFN(10, 0),
412458
.sad_all_offset = 0x300,
413459
.offsets_scrub = offsets_scrub_spr,
460+
.offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
461+
.offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
414462
.offsets_demand = offsets_demand_spr,
463+
.offsets_demand_hbm0 = offsets_demand_spr_hbm0,
464+
.offsets_demand_hbm1 = offsets_demand_spr_hbm1,
415465
};
416466

417467
static const struct x86_cpu_id i10nm_cpuids[] = {

drivers/edac/skx_common.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,11 @@ struct res_config {
159159
int sad_all_offset;
160160
/* Offsets of retry_rd_err_log registers */
161161
u32 *offsets_scrub;
162+
u32 *offsets_scrub_hbm0;
163+
u32 *offsets_scrub_hbm1;
162164
u32 *offsets_demand;
165+
u32 *offsets_demand_hbm0;
166+
u32 *offsets_demand_hbm1;
163167
};
164168

165169
typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,

0 commit comments

Comments
 (0)