Skip to content

Commit 4ae7c03

Browse files
Christoph LameterLinus Torvalds
authored andcommitted
[PATCH] Periodically drain non local pagesets
The pageset array can potentially acquire a huge amount of memory on large NUMA systems. F.e. on a system with 512 processors and 256 nodes there will be 256*512 pagesets. If each pageset only holds 5 pages then we are talking about 655360 pages.With a 16K page size on IA64 this results in potentially 10 Gigabytes of memory being trapped in pagesets. The typical cases are much less for smaller systems but there is still the potential of memory being trapped in off node pagesets. Off node memory may be rarely used if local memory is available and so we may potentially have memory in seldom used pagesets without this patch. The slab allocator flushes its per cpu caches every 2 seconds. The following patch flushes the off node pageset caches in the same way by tying into the slab flush. The patch also changes /proc/zoneinfo to include the number of pages currently in each pageset. Signed-off-by: Christoph Lameter <clameter@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
1 parent 578c2fd commit 4ae7c03

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

include/linux/gfp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,5 +133,10 @@ extern void FASTCALL(free_cold_page(struct page *page));
133133
#define free_page(addr) free_pages((addr),0)
134134

135135
void page_alloc_init(void);
136+
#ifdef CONFIG_NUMA
137+
void drain_remote_pages(void);
138+
#else
139+
static inline void drain_remote_pages(void) { };
140+
#endif
136141

137142
#endif /* __LINUX_GFP_H */

mm/page_alloc.c

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,36 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
516516
return allocated;
517517
}
518518

519+
#ifdef CONFIG_NUMA
520+
/* Called from the slab reaper to drain remote pagesets */
521+
void drain_remote_pages(void)
522+
{
523+
struct zone *zone;
524+
int i;
525+
unsigned long flags;
526+
527+
local_irq_save(flags);
528+
for_each_zone(zone) {
529+
struct per_cpu_pageset *pset;
530+
531+
/* Do not drain local pagesets */
532+
if (zone->zone_pgdat->node_id == numa_node_id())
533+
continue;
534+
535+
pset = zone->pageset[smp_processor_id()];
536+
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
537+
struct per_cpu_pages *pcp;
538+
539+
pcp = &pset->pcp[i];
540+
if (pcp->count)
541+
pcp->count -= free_pages_bulk(zone, pcp->count,
542+
&pcp->list, 0);
543+
}
544+
}
545+
local_irq_restore(flags);
546+
}
547+
#endif
548+
519549
#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
520550
static void __drain_pages(unsigned int cpu)
521551
{
@@ -1271,12 +1301,13 @@ void show_free_areas(void)
12711301
pageset = zone_pcp(zone, cpu);
12721302

12731303
for (temperature = 0; temperature < 2; temperature++)
1274-
printk("cpu %d %s: low %d, high %d, batch %d\n",
1304+
printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
12751305
cpu,
12761306
temperature ? "cold" : "hot",
12771307
pageset->pcp[temperature].low,
12781308
pageset->pcp[temperature].high,
1279-
pageset->pcp[temperature].batch);
1309+
pageset->pcp[temperature].batch,
1310+
pageset->pcp[temperature].count);
12801311
}
12811312
}
12821313

mm/slab.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2851,6 +2851,7 @@ static void cache_reap(void *unused)
28512851
}
28522852
check_irq_on();
28532853
up(&cache_chain_sem);
2854+
drain_remote_pages();
28542855
/* Setup the next iteration */
28552856
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC + smp_processor_id());
28562857
}

0 commit comments

Comments
 (0)