Skip to content

Commit b2d3b5e

Browse files
nfontmpe
authored andcommitted
powerpc/pseries: Track LMB nid instead of using device tree
When removing memory we need to remove the memory from the node it was added to instead of looking up the node it should be in in the device tree. During testing we have seen scenarios where the affinity for a LMB changes due to a partition migration or PRRN event. In these cases the node the LMB exists in may not match the node the device tree indicates it belongs in. This can lead to a system crash when trying to DLPAR remove the LMB after a migration or PRRN event. The current code looks up the node in the device tree to remove the LMB from, the crash occurs when we try to offline this node and it does not have any data, i.e. node_data[nid] == NULL. 36:mon> e cpu 0x36: Vector: 300 (Data Access) at [c0000001828b7810] pc: c00000000036d08c: try_offline_node+0x2c/0x1b0 lr: c0000000003a14ec: remove_memory+0xbc/0x110 sp: c0000001828b7a90 msr: 800000000280b033 dar: 9a28 dsisr: 40000000 current = 0xc0000006329c4c80 paca = 0xc000000007a55200 softe: 0 irq_happened: 0x01 pid = 76926, comm = kworker/u320:3 36:mon> t [link register ] c0000000003a14ec remove_memory+0xbc/0x110 [c0000001828b7a90] c00000000006a1cc arch_remove_memory+0x9c/0xd0 (unreliable) [c0000001828b7ad0] c0000000003a14e0 remove_memory+0xb0/0x110 [c0000001828b7b20] c0000000000c7db4 dlpar_remove_lmb+0x94/0x160 [c0000001828b7b60] c0000000000c8ef8 dlpar_memory+0x7e8/0xd10 [c0000001828b7bf0] c0000000000bf828 handle_dlpar_errorlog+0xf8/0x160 [c0000001828b7c60] c0000000000bf8cc pseries_hp_work_fn+0x3c/0xa0 [c0000001828b7c90] c000000000128cd8 process_one_work+0x298/0x5a0 [c0000001828b7d20] c000000000129068 worker_thread+0x88/0x620 [c0000001828b7dc0] c00000000013223c kthread+0x1ac/0x1c0 [c0000001828b7e30] c00000000000b45c ret_from_kernel_thread+0x5c/0x80 To resolve this we need to track the node a LMB belongs to when it is added to the system so we can remove it from that node instead of the node that the device tree indicates it should belong to. Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent f341d89 commit b2d3b5e

File tree

3 files changed

+34
-10
lines changed

3 files changed

+34
-10
lines changed

arch/powerpc/include/asm/drmem.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ struct drmem_lmb {
1717
u32 drc_index;
1818
u32 aa_index;
1919
u32 flags;
20+
#ifdef CONFIG_MEMORY_HOTPLUG
21+
int nid;
22+
#endif
2023
};
2124

2225
struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
104107
lmb->aa_index = 0xffffffff;
105108
}
106109

110+
#ifdef CONFIG_MEMORY_HOTPLUG
111+
static inline void lmb_set_nid(struct drmem_lmb *lmb)
112+
{
113+
lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
114+
}
115+
static inline void lmb_clear_nid(struct drmem_lmb *lmb)
116+
{
117+
lmb->nid = -1;
118+
}
119+
#else
120+
static inline void lmb_set_nid(struct drmem_lmb *lmb)
121+
{
122+
}
123+
static inline void lmb_clear_nid(struct drmem_lmb *lmb)
124+
{
125+
}
126+
#endif
127+
107128
#endif /* _ASM_POWERPC_LMB_H */

arch/powerpc/mm/drmem.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
366366
if (!drmem_info->lmbs)
367367
return;
368368

369-
for_each_drmem_lmb(lmb)
369+
for_each_drmem_lmb(lmb) {
370370
read_drconf_v1_cell(lmb, &prop);
371+
lmb_set_nid(lmb);
372+
}
371373
}
372374

373375
static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
412414

413415
lmb->aa_index = dr_cell.aa_index;
414416
lmb->flags = dr_cell.flags;
417+
418+
lmb_set_nid(lmb);
415419
}
416420
}
417421
}

arch/powerpc/platforms/pseries/hotplug-memory.c

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
379379
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
380380
{
381381
unsigned long block_sz;
382-
int nid, rc;
382+
int rc;
383383

384384
if (!lmb_is_removable(lmb))
385385
return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
389389
return rc;
390390

391391
block_sz = pseries_memory_block_size();
392-
nid = memory_add_physaddr_to_nid(lmb->base_addr);
393392

394-
__remove_memory(nid, lmb->base_addr, block_sz);
393+
__remove_memory(lmb->nid, lmb->base_addr, block_sz);
395394

396395
/* Update memory regions for memory remove */
397396
memblock_remove(lmb->base_addr, block_sz);
398397

399398
invalidate_lmb_associativity_index(lmb);
399+
lmb_clear_nid(lmb);
400400
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
401401

402402
return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
653653
static int dlpar_add_lmb(struct drmem_lmb *lmb)
654654
{
655655
unsigned long block_sz;
656-
int nid, rc;
656+
int rc;
657657

658658
if (lmb->flags & DRCONF_MEM_ASSIGNED)
659659
return -EINVAL;
@@ -664,22 +664,21 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
664664
return rc;
665665
}
666666

667+
lmb_set_nid(lmb);
667668
block_sz = memory_block_size_bytes();
668669

669-
/* Find the node id for this address */
670-
nid = memory_add_physaddr_to_nid(lmb->base_addr);
671-
672670
/* Add the memory */
673-
rc = __add_memory(nid, lmb->base_addr, block_sz);
671+
rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
674672
if (rc) {
675673
invalidate_lmb_associativity_index(lmb);
676674
return rc;
677675
}
678676

679677
rc = dlpar_online_lmb(lmb);
680678
if (rc) {
681-
__remove_memory(nid, lmb->base_addr, block_sz);
679+
__remove_memory(lmb->nid, lmb->base_addr, block_sz);
682680
invalidate_lmb_associativity_index(lmb);
681+
lmb_clear_nid(lmb);
683682
} else {
684683
lmb->flags |= DRCONF_MEM_ASSIGNED;
685684
}

0 commit comments

Comments
 (0)