Permalink
Browse files

Merge branch 'akpm' (aka "Andrew's patch-bomb, take two")

Andrew explains:

 - various misc stuff

 - Most of the rest of MM: memcg, threaded hugepages, others.

 - cpumask

 - kexec

 - kdump

 - some direct-io performance tweaking

 - radix-tree optimisations

 - new selftests code

   A note on this: often people will develop a new userspace-visible
   feature and will develop userspace code to exercise/test that
   feature.  Then they merge the patch and the selftest code dies.
   Sometimes we paste it into the changelog.  Sometimes the code gets
   thrown into Documentation/(!).

   This saddens me.  So this patch creates a bare-bones framework which
   will henceforth allow me to ask people to include their test apps in
   the kernel tree so we can keep them alive.  Then when people enhance
   or fix the feature, I can ask them to update the test app too.

   The infrastruture is terribly trivial at present - let's see how it
   evolves.

 - checkpoint/restart feature work.

   A note on this: this is a project by various mad Russians to perform
   c/r mainly from userspace, with various oddball helper code added
   into the kernel where the need is demonstrated.

   So rather than some large central lump of code, what we have is
   little bits and pieces popping up in various places which either
   expose something new or which permit something which is normally
   kernel-private to be modified.

   The overall project is an ongoing thing.  I've judged that the size
   and scope of the thing means that we're more likely to be successful
   with it if we integrate the support into mainline piecemeal rather
   than allowing it all to develop out-of-tree.

   However I'm less confident than the developers that it will all
   eventually work! So what I'm asking them to do is to wrap each piece
   of new code inside CONFIG_CHECKPOINT_RESTORE.  So if it all
   eventually comes to tears and the project as a whole fails, it should
   be a simple matter to go through and delete all trace of it.

This lot pretty much wraps up the -rc1 merge for me.

* akpm: (96 commits)
  unlzo: fix input buffer free
  ramoops: update parameters only after successful init
  ramoops: fix use of rounddown_pow_of_two()
  c/r: prctl: add PR_SET_MM codes to set up mm_struct entries
  c/r: procfs: add start_data, end_data, start_brk members to /proc/$pid/stat v4
  c/r: introduce CHECKPOINT_RESTORE symbol
  selftests: new x86 breakpoints selftest
  selftests: new very basic kernel selftests directory
  radix_tree: take radix_tree_path off stack
  radix_tree: remove radix_tree_indirect_to_ptr()
  dio: optimize cache misses in the submission path
  vfs: cache request_queue in struct block_device
  fs/direct-io.c: calculate fs_count correctly in get_more_blocks()
  drivers/parport/parport_pc.c: fix warnings
  panic: don't print redundant backtraces on oops
  sysctl: add the kernel.ns_last_pid control
  kdump: add udev events for memory online/offline
  include/linux/crash_dump.h needs elf.h
  kdump: fix crash_kexec()/smp_send_stop() race in panic()
  kdump: crashk_res init check for /sys/kernel/kexec_crash_size
  ...
  • Loading branch information...
torvalds committed Jan 13, 2012
2 parents 7c17d86 + 35f1526 commit 099469502f62fbe0d7e4f0b83a2f22538367f734
Showing with 2,589 additions and 1,562 deletions.
  1. +4 −0 Documentation/ABI/testing/sysfs-kernel-slab
  2. +6 −3 Documentation/cgroups/memory.txt
  3. +3 −0 Documentation/filesystems/proc.txt
  4. +8 −0 Documentation/sysctl/kernel.txt
  5. +4 −1 Documentation/vm/slub.txt
  6. +14 −0 arch/Kconfig
  7. +1 −1 arch/avr32/include/asm/system.h
  8. +1 −1 arch/avr32/kernel/traps.c
  9. +0 −1 arch/ia64/include/asm/processor.h
  10. +2 −2 arch/ia64/kernel/machine_kexec.c
  11. +1 −2 arch/m68k/amiga/config.c
  12. +1 −1 arch/mips/include/asm/ptrace.h
  13. +1 −1 arch/mips/kernel/traps.c
  14. +1 −1 arch/mn10300/include/asm/exceptions.h
  15. +0 −2 arch/parisc/include/asm/processor.h
  16. +0 −1 arch/parisc/kernel/process.c
  17. +2 −2 arch/powerpc/kernel/machine_kexec_32.c
  18. +3 −3 arch/powerpc/kernel/machine_kexec_64.c
  19. +1 −1 arch/powerpc/mm/numa.c
  20. +0 −1 arch/powerpc/platforms/pseries/nvram.c
  21. +1 −1 arch/s390/include/asm/processor.h
  22. +1 −1 arch/s390/kernel/nmi.c
  23. +1 −1 arch/sh/kernel/process_32.c
  24. +1 −1 arch/sh/kernel/process_64.c
  25. +3 −3 arch/tile/kernel/machine_kexec.c
  26. +3 −0 arch/x86/Kconfig
  27. +0 −6 arch/x86/Kconfig.cpu
  28. +1 −1 arch/x86/mm/numa.c
  29. +0 −8 arch/x86/um/Kconfig
  30. +14 −3 drivers/base/memory.c
  31. +12 −12 drivers/char/ramoops.c
  32. +1 −2 drivers/mtd/mtdoops.c
  33. +2 −2 drivers/parport/parport_pc.c
  34. +3 −3 drivers/video/nvidia/nvidia.c
  35. +3 −0 fs/block_dev.c
  36. +3 −2 fs/btrfs/disk-io.c
  37. +41 −16 fs/direct-io.c
  38. +209 −25 fs/eventpoll.c
  39. +2 −1 fs/hugetlbfs/inode.c
  40. +1 −1 fs/nfs/internal.h
  41. +2 −2 fs/nfs/write.c
  42. +1 −1 fs/pipe.c
  43. +5 −2 fs/proc/array.c
  44. +2 −0 fs/proc/base.c
  45. +14 −0 include/asm-generic/tlb.h
  46. +1 −0 include/linux/crash_dump.h
  47. +1 −0 include/linux/eventpoll.h
  48. +11 −3 include/linux/fs.h
  49. +1 −1 include/linux/huge_mm.h
  50. +7 −6 include/linux/kernel.h
  51. +0 −1 include/linux/kmsg_dump.h
  52. +0 −4 include/linux/linkage.h
  53. +66 −39 include/linux/memcontrol.h
  54. +18 −5 include/linux/migrate.h
  55. +21 −23 include/linux/mm_inline.h
  56. +4 −5 include/linux/mm_types.h
  57. +16 −12 include/linux/mmzone.h
  58. +1 −1 include/linux/oom.h
  59. +2 −44 include/linux/page_cgroup.h
  60. +5 −7 include/linux/pagevec.h
  61. +12 −0 include/linux/prctl.h
  62. +0 −3 include/linux/radix-tree.h
  63. +2 −2 include/linux/rmap.h
  64. +1 −1 include/linux/sched.h
  65. +14 −8 include/trace/events/vmscan.h
  66. +11 −0 init/Kconfig
  67. +3 −3 kernel/exit.c
  68. +18 −7 kernel/kexec.c
  69. +1 −1 kernel/kprobes.c
  70. +23 −3 kernel/panic.c
  71. +3 −1 kernel/pid.c
  72. +31 −0 kernel/pid_namespace.c
  73. +121 −0 kernel/sys.c
  74. +1 −1 lib/decompress_unlzo.c
  75. +76 −78 lib/radix-tree.c
  76. +3 −2 mm/compaction.c
  77. +2 −16 mm/filemap.c
  78. +56 −37 mm/huge_memory.c
  79. +11 −0 mm/ksm.c
  80. +500 −602 mm/memcontrol.c
  81. +1 −1 mm/memory-failure.c
  82. +2 −2 mm/memory.c
  83. +1 −1 mm/memory_hotplug.c
  84. +1 −1 mm/mempolicy.c
  85. +119 −54 mm/migrate.c
  86. +21 −21 mm/oom_kill.c
  87. +41 −14 mm/page_alloc.c
  88. +57 −107 mm/page_cgroup.c
  89. +10 −10 mm/rmap.c
  90. +6 −3 mm/slub.c
  91. +35 −44 mm/swap.c
  92. +10 −0 mm/swap_state.c
  93. +5 −4 mm/swapfile.c
  94. +4 −5 mm/vmalloc.c
  95. +420 −260 mm/vmscan.c
  96. +1 −1 mm/vmstat.c
  97. +11 −0 tools/testing/selftests/Makefile
  98. +20 −0 tools/testing/selftests/breakpoints/Makefile
  99. +394 −0 tools/testing/selftests/breakpoints/breakpoint_test.c
  100. +8 −0 tools/testing/selftests/run_tests
@@ -346,6 +346,10 @@ Description:
number of objects per slab. If a slab cannot be allocated
because of fragmentation, SLUB will retry with the minimum order
possible depending on its characteristics.
+ When debug_guardpage_minorder=N (N > 0) parameter is specified
+ (see Documentation/kernel-parameters.txt), the minimum possible
+ order is used and this sysfs entry can not be used to change
+ the order at run time.
What: /sys/kernel/slab/cache/order_fallback
Date: April 2008
@@ -61,7 +61,7 @@ Brief summary of control files.
memory.failcnt # show the number of memory usage hits limits
memory.memsw.failcnt # show the number of memory+Swap hits limits
memory.max_usage_in_bytes # show max memory usage recorded
- memory.memsw.usage_in_bytes # show max memory+Swap usage recorded
+ memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
memory.soft_limit_in_bytes # set/show soft limit of memory usage
memory.stat # show various statistics
memory.use_hierarchy # set/show hierarchical account enabled
@@ -410,8 +410,11 @@ memory.stat file includes following statistics
cache - # of bytes of page cache memory.
rss - # of bytes of anonymous and swap cache memory.
mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
-pgpgin - # of pages paged in (equivalent to # of charging events).
-pgpgout - # of pages paged out (equivalent to # of uncharging events).
+pgpgin - # of charging events to the memory cgroup. The charging
+ event happens each time a page is accounted as either mapped
+ anon page(RSS) or cache page(Page Cache) to the cgroup.
+pgpgout - # of uncharging events to the memory cgroup. The uncharging
+ event happens each time a page is unaccounted from the cgroup.
swap - # of bytes of swap usage
inactive_anon - # of bytes of anonymous memory and swap cache memory on
LRU list.
@@ -307,6 +307,9 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
blkio_ticks time spent waiting for block IO
gtime guest time of the task in jiffies
cgtime guest time of the task children in jiffies
+ start_data address above which program data+bss is placed
+ end_data address below which program data+bss is placed
+ start_brk address above which program heap can be expanded with brk()
..............................................................................
The /proc/PID/maps file containing the currently mapped memory regions and
@@ -415,6 +415,14 @@ PIDs of value pid_max or larger are not allocated.
==============================================================
+ns_last_pid:
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+==============================================================
+
powersave-nap: (PPC only)
If set, Linux-PPC will use the 'nap' mode of powersaving,
@@ -131,7 +131,10 @@ slub_min_objects.
slub_max_order specified the order at which slub_min_objects should no
longer be checked. This is useful to avoid SLUB trying to generate
super large order pages to fit slub_min_objects of a slab cache with
-large object sizes into one high order page.
+large object sizes into one high order page. Setting command line
+parameter debug_guardpage_minorder=N (N > 0), forces setting
+slub_max_order to 0, what cause minimum possible order of slabs
+allocation.
SLUB Debug output
-----------------
View
@@ -185,4 +185,18 @@ config HAVE_RCU_TABLE_FREE
config ARCH_HAVE_NMI_SAFE_CMPXCHG
bool
+config HAVE_ALIGNED_STRUCT_PAGE
+ bool
+ help
+ This makes sure that struct pages are double word aligned and that
+ e.g. the SLUB allocator can perform double word atomic operations
+ on a struct page for better performance. However selecting this
+ might increase the size of a struct page by a word.
+
+config HAVE_CMPXCHG_LOCAL
+ bool
+
+config HAVE_CMPXCHG_DOUBLE
+ bool
+
source "kernel/gcov/Kconfig"
@@ -169,7 +169,7 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
#define cmpxchg64_local(ptr, o, n) __cmpxchg64_local_generic((ptr), (o), (n))
struct pt_regs;
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err);
+void die(const char *str, struct pt_regs *regs, long err);
void _exception(long signr, struct pt_regs *regs, int code,
unsigned long addr);
@@ -24,7 +24,7 @@
static DEFINE_SPINLOCK(die_lock);
-void NORET_TYPE die(const char *str, struct pt_regs *regs, long err)
+void die(const char *str, struct pt_regs *regs, long err)
{
static int die_counter;
@@ -309,7 +309,6 @@ struct thread_struct {
}
#define start_thread(regs,new_ip,new_sp) do { \
- set_fs(USER_DS); \
regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \
& ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \
regs->cr_iip = new_ip; \
@@ -27,11 +27,11 @@
#include <asm/sal.h>
#include <asm/mca.h>
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long start_address,
struct ia64_boot_param *boot_param,
- unsigned long pal_addr) ATTRIB_NORET;
+ unsigned long pal_addr) __noreturn;
struct kimage *ia64_kimage;
View
@@ -511,8 +511,7 @@ static unsigned long amiga_gettimeoffset(void)
return ticks + offset;
}
-static NORET_TYPE void amiga_reset(void)
- ATTRIB_NORET;
+static void amiga_reset(void) __noreturn;
static void amiga_reset(void)
{
@@ -144,7 +144,7 @@ extern int ptrace_set_watch_regs(struct task_struct *child,
extern asmlinkage void syscall_trace_enter(struct pt_regs *regs);
extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
-extern NORET_TYPE void die(const char *, struct pt_regs *) ATTRIB_NORET;
+extern void die(const char *, struct pt_regs *) __noreturn;
static inline void die_if_kernel(const char *str, struct pt_regs *regs)
{
View
@@ -1340,7 +1340,7 @@ void ejtag_exception_handler(struct pt_regs *regs)
/*
* NMI exception handler.
*/
-NORET_TYPE void ATTRIB_NORET nmi_exception_handler(struct pt_regs *regs)
+void __noreturn nmi_exception_handler(struct pt_regs *regs)
{
bust_spinlocks(1);
printk("NMI taken!!!!\n");
@@ -110,7 +110,7 @@ extern asmlinkage void nmi_handler(void);
extern asmlinkage void misalignment(struct pt_regs *, enum exception_code);
extern void die(const char *, struct pt_regs *, enum exception_code)
- ATTRIB_NORET;
+ __noreturn;
extern int die_if_no_fixup(const char *, struct pt_regs *, enum exception_code);
@@ -196,7 +196,6 @@ typedef unsigned int elf_caddr_t;
/* offset pc for priv. level */ \
pc |= 3; \
\
- set_fs(USER_DS); \
regs->iasq[0] = spaceid; \
regs->iasq[1] = spaceid; \
regs->iaoq[0] = pc; \
@@ -299,7 +298,6 @@ on downward growing arches, it looks like this:
elf_addr_t pc = (elf_addr_t)new_pc | 3; \
elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \
\
- set_fs(USER_DS); \
regs->iasq[0] = spaceid; \
regs->iasq[1] = spaceid; \
regs->iaoq[0] = pc; \
@@ -192,7 +192,6 @@ void flush_thread(void)
/* Only needs to handle fpu stuff or perf monitors.
** REVISIT: several arches implement a "lazy fpu state".
*/
- set_fs(USER_DS);
}
void release_thread(struct task_struct *dead_task)
@@ -16,10 +16,10 @@
#include <asm/hw_irq.h>
#include <asm/io.h>
-typedef NORET_TYPE void (*relocate_new_kernel_t)(
+typedef void (*relocate_new_kernel_t)(
unsigned long indirection_page,
unsigned long reboot_code_buffer,
- unsigned long start_address) ATTRIB_NORET;
+ unsigned long start_address) __noreturn;
/*
* This is a generic machine_kexec function suitable at least for
@@ -307,9 +307,9 @@ static union thread_union kexec_stack __init_task_data =
struct paca_struct kexec_paca;
/* Our assembly helper, in kexec_stub.S */
-extern NORET_TYPE void kexec_sequence(void *newstack, unsigned long start,
- void *image, void *control,
- void (*clear_all)(void)) ATTRIB_NORET;
+extern void kexec_sequence(void *newstack, unsigned long start,
+ void *image, void *control,
+ void (*clear_all)(void)) __noreturn;
/* too late to fail here */
void default_machine_kexec(struct kimage *image)
View
@@ -58,7 +58,7 @@ static int distance_lookup_table[MAX_NUMNODES][MAX_DISTANCE_REF_POINTS];
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
- * Note: node_to_cpumask() is not valid until after this is done.
+ * Note: cpumask_of_node() is not valid until after this is done.
*/
static void __init setup_node_to_cpumask_map(void)
{
@@ -638,7 +638,6 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
/* These are almost always orderly shutdowns. */
return;
case KMSG_DUMP_OOPS:
- case KMSG_DUMP_KEXEC:
break;
case KMSG_DUMP_PANIC:
panicking = true;
@@ -236,7 +236,7 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
/*
* Function to drop a processor into disabled wait state
*/
-static inline void ATTRIB_NORET disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(unsigned long code)
{
unsigned long ctl_buf;
psw_t dw_psw;
View
@@ -30,7 +30,7 @@ struct mcck_struct {
static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
-static NORET_TYPE void s390_handle_damage(char *msg)
+static void s390_handle_damage(char *msg)
{
smp_send_stop();
disabled_wait((unsigned long) __builtin_return_address(0));
@@ -70,7 +70,7 @@ void show_regs(struct pt_regs * regs)
/*
* Create a kernel thread
*/
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
{
do_exit(fn(arg));
}
@@ -285,7 +285,7 @@ void show_regs(struct pt_regs *regs)
/*
* Create a kernel thread
*/
-ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
+__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
{
do_exit(fn(arg));
}
@@ -248,11 +248,11 @@ static void setup_quasi_va_is_pa(void)
}
-NORET_TYPE void machine_kexec(struct kimage *image)
+void machine_kexec(struct kimage *image)
{
void *reboot_code_buffer;
- NORET_TYPE void (*rnk)(unsigned long, void *, unsigned long)
- ATTRIB_NORET;
+ void (*rnk)(unsigned long, void *, unsigned long)
+ __noreturn;
/* Mask all interrupts before starting to reboot. */
interrupt_mask_set_mask(~0ULL);
View
@@ -60,6 +60,9 @@ config X86
select PERF_EVENTS
select HAVE_PERF_EVENTS_NMI
select ANON_INODES
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
+ select HAVE_CMPXCHG_LOCAL if !M386
+ select HAVE_CMPXCHG_DOUBLE
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
select ARCH_BINFMT_ELF_RANDOMIZE_PIE
View
@@ -309,12 +309,6 @@ config X86_INTERNODE_CACHE_SHIFT
config X86_CMPXCHG
def_bool X86_64 || (X86_32 && !M386)
-config CMPXCHG_LOCAL
- def_bool X86_64 || (X86_32 && !M386)
-
-config CMPXCHG_DOUBLE
- def_bool y
-
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
View
@@ -110,7 +110,7 @@ void __cpuinit numa_clear_node(int cpu)
* Allocate node_to_cpumask_map based on number of available nodes
* Requires node_possible_map to be valid.
*
- * Note: node_to_cpumask() is not valid until after this is done.
+ * Note: cpumask_of_node() is not valid until after this is done.
* (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
*/
void __init setup_node_to_cpumask_map(void)
View
@@ -6,14 +6,6 @@ menu "UML-specific options"
menu "Host processor type and features"
-config CMPXCHG_LOCAL
- bool
- default n
-
-config CMPXCHG_DOUBLE
- bool
- default n
-
source "arch/x86/Kconfig.cpu"
endmenu
View
@@ -295,11 +295,22 @@ static int memory_block_change_state(struct memory_block *mem,
ret = memory_block_action(mem->start_section_nr, to_state);
- if (ret)
+ if (ret) {
mem->state = from_state_req;
- else
- mem->state = to_state;
+ goto out;
+ }
+ mem->state = to_state;
+ switch (mem->state) {
+ case MEM_OFFLINE:
+ kobject_uevent(&mem->dev.kobj, KOBJ_OFFLINE);
+ break;
+ case MEM_ONLINE:
+ kobject_uevent(&mem->dev.kobj, KOBJ_ONLINE);
+ break;
+ default:
+ break;
+ }
out:
mutex_unlock(&mem->state_mutex);
return ret;
Oops, something went wrong.

0 comments on commit 0994695

Please sign in to comment.