Skip to content

Commit bafb12f

Browse files
committed
Merge tag 'kernel-5.14.0-427.3.1.el9_4' from 9.4
kernel-5.14.0-427.3.1.el9_4 Conflicts: - Makefile.rhelver changes dropped - redhat/rhdocs/ changes dropped Signed-off-by: Scott Weaver <scweaver@redhat.com>
2 parents a7a48e2 + dce04cf commit bafb12f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1808
-924
lines changed

Documentation/driver-api/edac.rst

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ will occupy those chip-select rows.
106106
This term is avoided because it is unclear when needing to distinguish
107107
between chip-select rows and socket sets.
108108

109+
* High Bandwidth Memory (HBM)
110+
111+
HBM is a new memory type with low power consumption and ultra-wide
112+
communication lanes. It uses vertically stacked memory chips (DRAM dies)
113+
interconnected by microscopic wires called "through-silicon vias," or
114+
TSVs.
115+
116+
Several stacks of HBM chips connect to the CPU or GPU through an ultra-fast
117+
interconnect called the "interposer". Therefore, HBM's characteristics
118+
are nearly indistinguishable from on-chip integrated RAM.
109119

110120
Memory Controllers
111121
------------------
@@ -176,3 +186,113 @@ nodes::
176186
the L1 and L2 directories would be "edac_device_block's"
177187

178188
.. kernel-doc:: drivers/edac/edac_device.h
189+
190+
191+
Heterogeneous system support
192+
----------------------------
193+
194+
An AMD heterogeneous system is built by connecting the data fabrics of
195+
both CPUs and GPUs via custom xGMI links. Thus, the data fabric on the
196+
GPU nodes can be accessed the same way as the data fabric on CPU nodes.
197+
198+
The MI200 accelerators are data center GPUs. They have 2 data fabrics,
199+
and each GPU data fabric contains four Unified Memory Controllers (UMC).
200+
Each UMC contains eight channels. Each UMC channel controls one 128-bit
201+
HBM2e (2GB) channel (equivalent to 8 X 2GB ranks). This creates a total
202+
of 4096-bits of DRAM data bus.
203+
204+
While the UMC is interfacing a 16GB (8high X 2GB DRAM) HBM stack, each UMC
205+
channel is interfacing 2GB of DRAM (represented as rank).
206+
207+
Memory controllers on AMD GPU nodes can be represented in EDAC thusly:
208+
209+
GPU DF / GPU Node -> EDAC MC
210+
GPU UMC -> EDAC CSROW
211+
GPU UMC channel -> EDAC CHANNEL
212+
213+
For example: a heterogeneous system with 1 AMD CPU is connected to
214+
4 MI200 (Aldebaran) GPUs using xGMI.
215+
216+
Some more heterogeneous hardware details:
217+
218+
- The CPU UMC (Unified Memory Controller) is mostly the same as the GPU UMC.
219+
They have chip selects (csrows) and channels. However, the layouts are different
220+
for performance, physical layout, or other reasons.
221+
- CPU UMCs use 1 channel, In this case UMC = EDAC channel. This follows the
222+
marketing speak. CPU has X memory channels, etc.
223+
- CPU UMCs use up to 4 chip selects, So UMC chip select = EDAC CSROW.
224+
- GPU UMCs use 1 chip select, So UMC = EDAC CSROW.
225+
- GPU UMCs use 8 channels, So UMC channel = EDAC channel.
226+
227+
The EDAC subsystem provides a mechanism to handle AMD heterogeneous
228+
systems by calling system specific ops for both CPUs and GPUs.
229+
230+
AMD GPU nodes are enumerated in sequential order based on the PCI
231+
hierarchy, and the first GPU node is assumed to have a Node ID value
232+
following those of the CPU nodes after latter are fully populated::
233+
234+
$ ls /sys/devices/system/edac/mc/
235+
mc0 - CPU MC node 0
236+
mc1 |
237+
mc2 |- GPU card[0] => node 0(mc1), node 1(mc2)
238+
mc3 |
239+
mc4 |- GPU card[1] => node 0(mc3), node 1(mc4)
240+
mc5 |
241+
mc6 |- GPU card[2] => node 0(mc5), node 1(mc6)
242+
mc7 |
243+
mc8 |- GPU card[3] => node 0(mc7), node 1(mc8)
244+
245+
For example, a heterogeneous system with one AMD CPU is connected to
246+
four MI200 (Aldebaran) GPUs using xGMI. This topology can be represented
247+
via the following sysfs entries::
248+
249+
/sys/devices/system/edac/mc/..
250+
251+
CPU # CPU node
252+
├── mc 0
253+
254+
GPU Nodes are enumerated sequentially after CPU nodes have been populated
255+
GPU card 1 # Each MI200 GPU has 2 nodes/mcs
256+
├── mc 1 # GPU node 0 == mc1, Each MC node has 4 UMCs/CSROWs
257+
│   ├── csrow 0 # UMC 0
258+
│   │   ├── channel 0 # Each UMC has 8 channels
259+
│   │   ├── channel 1 # size of each channel is 2 GB, so each UMC has 16 GB
260+
│   │   ├── channel 2
261+
│   │   ├── channel 3
262+
│   │   ├── channel 4
263+
│   │   ├── channel 5
264+
│   │   ├── channel 6
265+
│   │   ├── channel 7
266+
│   ├── csrow 1 # UMC 1
267+
│   │   ├── channel 0
268+
│   │   ├── ..
269+
│   │   ├── channel 7
270+
│   ├── .. ..
271+
│   ├── csrow 3 # UMC 3
272+
│   │   ├── channel 0
273+
│   │   ├── ..
274+
│   │   ├── channel 7
275+
│   ├── rank 0
276+
│   ├── .. ..
277+
│   ├── rank 31 # total 32 ranks/dimms from 4 UMCs
278+
279+
├── mc 2 # GPU node 1 == mc2
280+
│   ├── .. # each GPU has total 64 GB
281+
282+
GPU card 2
283+
├── mc 3
284+
│   ├── ..
285+
├── mc 4
286+
│   ├── ..
287+
288+
GPU card 3
289+
├── mc 5
290+
│   ├── ..
291+
├── mc 6
292+
│   ├── ..
293+
294+
GPU card 4
295+
├── mc 7
296+
│   ├── ..
297+
├── mc 8
298+
│   ├── ..

arch/x86/include/asm/mce.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ enum smca_bank_types {
307307
SMCA_PIE, /* Power, Interrupts, etc. */
308308
SMCA_UMC, /* Unified Memory Controller */
309309
SMCA_UMC_V2,
310+
SMCA_MA_LLC, /* Memory Attached Last Level Cache */
310311
SMCA_PB, /* Parameter Block */
311312
SMCA_PSP, /* Platform Security Processor */
312313
SMCA_PSP_V2,
@@ -322,14 +323,15 @@ enum smca_bank_types {
322323
SMCA_SHUB, /* System HUB Unit */
323324
SMCA_SATA, /* SATA Unit */
324325
SMCA_USB, /* USB Unit */
326+
SMCA_USR_DP, /* Ultra Short Reach Data Plane Controller */
327+
SMCA_USR_CP, /* Ultra Short Reach Control Plane Controller */
325328
SMCA_GMI_PCS, /* GMI PCS Unit */
326329
SMCA_XGMI_PHY, /* xGMI PHY Unit */
327330
SMCA_WAFL_PHY, /* WAFL PHY Unit */
328331
SMCA_GMI_PHY, /* GMI PHY Unit */
329332
N_SMCA_BANK_TYPES
330333
};
331334

332-
extern const char *smca_get_long_name(enum smca_bank_types t);
333335
extern bool amd_mce_is_memory_error(struct mce *m);
334336

335337
extern int mce_threshold_create_device(unsigned int cpu);

arch/x86/kernel/cpu/common.c

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1155,18 +1155,34 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
11551155
void get_cpu_address_sizes(struct cpuinfo_x86 *c)
11561156
{
11571157
u32 eax, ebx, ecx, edx;
1158+
bool vp_bits_from_cpuid = true;
11581159

1159-
if (c->extended_cpuid_level >= 0x80000008) {
1160+
if (!cpu_has(c, X86_FEATURE_CPUID) ||
1161+
(c->extended_cpuid_level < 0x80000008))
1162+
vp_bits_from_cpuid = false;
1163+
1164+
if (vp_bits_from_cpuid) {
11601165
cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
11611166

11621167
c->x86_virt_bits = (eax >> 8) & 0xff;
11631168
c->x86_phys_bits = eax & 0xff;
1169+
} else {
1170+
if (IS_ENABLED(CONFIG_X86_64)) {
1171+
c->x86_clflush_size = 64;
1172+
c->x86_phys_bits = 36;
1173+
c->x86_virt_bits = 48;
1174+
} else {
1175+
c->x86_clflush_size = 32;
1176+
c->x86_virt_bits = 32;
1177+
c->x86_phys_bits = 32;
1178+
1179+
if (cpu_has(c, X86_FEATURE_PAE) ||
1180+
cpu_has(c, X86_FEATURE_PSE36))
1181+
c->x86_phys_bits = 36;
1182+
}
11641183
}
1165-
#ifdef CONFIG_X86_32
1166-
else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
1167-
c->x86_phys_bits = 36;
1168-
#endif
11691184
c->x86_cache_bits = c->x86_phys_bits;
1185+
c->x86_cache_alignment = c->x86_clflush_size;
11701186
}
11711187

11721188
static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -1582,17 +1598,6 @@ static void __init cpu_parse_early_param(void)
15821598
*/
15831599
static void __init early_identify_cpu(struct cpuinfo_x86 *c)
15841600
{
1585-
#ifdef CONFIG_X86_64
1586-
c->x86_clflush_size = 64;
1587-
c->x86_phys_bits = 36;
1588-
c->x86_virt_bits = 48;
1589-
#else
1590-
c->x86_clflush_size = 32;
1591-
c->x86_phys_bits = 32;
1592-
c->x86_virt_bits = 32;
1593-
#endif
1594-
c->x86_cache_alignment = c->x86_clflush_size;
1595-
15961601
memset(&c->x86_capability, 0, sizeof(c->x86_capability));
15971602
c->extended_cpuid_level = 0;
15981603

@@ -1605,8 +1610,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
16051610
get_cpu_vendor(c);
16061611
get_cpu_cap(c);
16071612
get_model_name(c); /* RHEL: get model name for unsupported check */
1608-
get_cpu_address_sizes(c);
16091613
setup_force_cpu_cap(X86_FEATURE_CPUID);
1614+
get_cpu_address_sizes(c);
16101615
cpu_parse_early_param();
16111616

16121617
if (this_cpu->c_early_init)
@@ -1619,6 +1624,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
16191624
this_cpu->c_bsp_init(c);
16201625
} else {
16211626
setup_clear_cpu_cap(X86_FEATURE_CPUID);
1627+
get_cpu_address_sizes(c);
16221628
}
16231629

16241630
setup_force_cpu_cap(X86_FEATURE_ALWAYS);

0 commit comments

Comments
 (0)