@@ -173,23 +173,52 @@ int hwpoison_filter(struct page *p)
173173
174174EXPORT_SYMBOL_GPL (hwpoison_filter );
175175
176+ /*
177+ * Kill all processes that have a poisoned page mapped and then isolate
178+ * the page.
179+ *
180+ * General strategy:
181+ * Find all processes having the page mapped and kill them.
182+ * But we keep a page reference around so that the page is not
183+ * actually freed yet.
184+ * Then stash the page away
185+ *
186+ * There's no convenient way to get back to mapped processes
187+ * from the VMAs. So do a brute-force search over all
188+ * running processes.
189+ *
190+ * Remember that machine checks are not common (or rather
191+ * if they are common you have other problems), so this shouldn't
192+ * be a performance issue.
193+ *
194+ * Also there are some races possible while we get from the
195+ * error detection to actually handle it.
196+ */
197+
198+ struct to_kill {
199+ struct list_head nd ;
200+ struct task_struct * tsk ;
201+ unsigned long addr ;
202+ short size_shift ;
203+ char addr_valid ;
204+ };
205+
176206/*
177207 * Send all the processes who have the page mapped a signal.
178208 * ``action optional'' if they are not immediately affected by the error
179209 * ``action required'' if error happened in current execution context
180210 */
181- static int kill_proc (struct task_struct * t , unsigned long addr ,
182- unsigned long pfn , struct page * page , int flags )
211+ static int kill_proc (struct to_kill * tk , unsigned long pfn , int flags )
183212{
184- short addr_lsb ;
213+ struct task_struct * t = tk -> tsk ;
214+ short addr_lsb = tk -> size_shift ;
185215 int ret ;
186216
187217 pr_err ("Memory failure: %#lx: Killing %s:%d due to hardware memory corruption\n" ,
188218 pfn , t -> comm , t -> pid );
189- addr_lsb = compound_order (compound_head (page )) + PAGE_SHIFT ;
190219
191220 if ((flags & MF_ACTION_REQUIRED ) && t -> mm == current -> mm ) {
192- ret = force_sig_mceerr (BUS_MCEERR_AR , (void __user * )addr ,
221+ ret = force_sig_mceerr (BUS_MCEERR_AR , (void __user * )tk -> addr ,
193222 addr_lsb , current );
194223 } else {
195224 /*
@@ -198,7 +227,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr,
198227 * This could cause a loop when the user sets SIGBUS
199228 * to SIG_IGN, but hopefully no one will do that?
200229 */
201- ret = send_sig_mceerr (BUS_MCEERR_AO , (void __user * )addr ,
230+ ret = send_sig_mceerr (BUS_MCEERR_AO , (void __user * )tk -> addr ,
202231 addr_lsb , t ); /* synchronous? */
203232 }
204233 if (ret < 0 )
@@ -234,35 +263,6 @@ void shake_page(struct page *p, int access)
234263}
235264EXPORT_SYMBOL_GPL (shake_page );
236265
237- /*
238- * Kill all processes that have a poisoned page mapped and then isolate
239- * the page.
240- *
241- * General strategy:
242- * Find all processes having the page mapped and kill them.
243- * But we keep a page reference around so that the page is not
244- * actually freed yet.
245- * Then stash the page away
246- *
247- * There's no convenient way to get back to mapped processes
248- * from the VMAs. So do a brute-force search over all
249- * running processes.
250- *
251- * Remember that machine checks are not common (or rather
252- * if they are common you have other problems), so this shouldn't
253- * be a performance issue.
254- *
255- * Also there are some races possible while we get from the
256- * error detection to actually handle it.
257- */
258-
259- struct to_kill {
260- struct list_head nd ;
261- struct task_struct * tsk ;
262- unsigned long addr ;
263- char addr_valid ;
264- };
265-
266266/*
267267 * Failure handling: if we can't find or can't kill a process there's
268268 * not much we can do. We just print a message and ignore otherwise.
@@ -292,6 +292,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
292292 }
293293 tk -> addr = page_address_in_vma (p , vma );
294294 tk -> addr_valid = 1 ;
295+ tk -> size_shift = compound_order (compound_head (p )) + PAGE_SHIFT ;
295296
296297 /*
297298 * In theory we don't have to kill when the page was
@@ -317,9 +318,8 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
317318 * Also when FAIL is set do a force kill because something went
318319 * wrong earlier.
319320 */
320- static void kill_procs (struct list_head * to_kill , int forcekill ,
321- bool fail , struct page * page , unsigned long pfn ,
322- int flags )
321+ static void kill_procs (struct list_head * to_kill , int forcekill , bool fail ,
322+ unsigned long pfn , int flags )
323323{
324324 struct to_kill * tk , * next ;
325325
@@ -342,8 +342,7 @@ static void kill_procs(struct list_head *to_kill, int forcekill,
342342 * check for that, but we need to tell the
343343 * process anyways.
344344 */
345- else if (kill_proc (tk -> tsk , tk -> addr ,
346- pfn , page , flags ) < 0 )
345+ else if (kill_proc (tk , pfn , flags ) < 0 )
347346 pr_err ("Memory failure: %#lx: Cannot send advisory machine check signal to %s:%d\n" ,
348347 pfn , tk -> tsk -> comm , tk -> tsk -> pid );
349348 }
@@ -1012,7 +1011,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
10121011 * any accesses to the poisoned memory.
10131012 */
10141013 forcekill = PageDirty (hpage ) || (flags & MF_MUST_KILL );
1015- kill_procs (& tokill , forcekill , !unmap_success , p , pfn , flags );
1014+ kill_procs (& tokill , forcekill , !unmap_success , pfn , flags );
10161015
10171016 return unmap_success ;
10181017}
0 commit comments