@@ -879,6 +879,28 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
879879 return ret > 0 ? - EHWPOISON : - EFAULT ;
880880}
881881
882+ /*
883+ * MF_IGNORED - The m-f() handler marks the page as PG_hwpoisoned'ed.
884+ * But it could not do more to isolate the page from being accessed again,
885+ * nor does it kill the process. This is extremely rare and one of the
886+ * potential causes is that the page state has been changed due to
887+ * underlying race condition. This is the most severe outcomes.
888+ *
889+ * MF_FAILED - The m-f() handler marks the page as PG_hwpoisoned'ed.
890+ * It should have killed the process, but it can't isolate the page,
891+ * due to conditions such as extra pin, unmap failure, etc. Accessing
892+ * the page again may trigger another MCE and the process will be killed
893+ * by the m-f() handler immediately.
894+ *
895+ * MF_DELAYED - The m-f() handler marks the page as PG_hwpoisoned'ed.
896+ * The page is unmapped, and is removed from the LRU or file mapping.
897+ * An attempt to access the page again will trigger page fault and the
898+ * PF handler will kill the process.
899+ *
900+ * MF_RECOVERED - The m-f() handler marks the page as PG_hwpoisoned'ed.
901+ * The page has been completely isolated, that is, unmapped, taken out of
902+ * the buddy system, or hole-punnched out of the file mapping.
903+ */
882904static const char * action_name [] = {
883905 [MF_IGNORED ] = "Ignored" ,
884906 [MF_FAILED ] = "Failed" ,
@@ -893,6 +915,7 @@ static const char * const action_page_types[] = {
893915 [MF_MSG_DIFFERENT_COMPOUND ] = "different compound page after locking" ,
894916 [MF_MSG_HUGE ] = "huge page" ,
895917 [MF_MSG_FREE_HUGE ] = "free huge page" ,
918+ [MF_MSG_GET_HWPOISON ] = "get hwpoison page" ,
896919 [MF_MSG_UNMAP_FAILED ] = "unmapping failed page" ,
897920 [MF_MSG_DIRTY_SWAPCACHE ] = "dirty swapcache page" ,
898921 [MF_MSG_CLEAN_SWAPCACHE ] = "clean swapcache page" ,
@@ -906,6 +929,7 @@ static const char * const action_page_types[] = {
906929 [MF_MSG_BUDDY ] = "free buddy page" ,
907930 [MF_MSG_DAX ] = "dax page" ,
908931 [MF_MSG_UNSPLIT_THP ] = "unsplit thp" ,
932+ [MF_MSG_ALREADY_POISONED ] = "already poisoned" ,
909933 [MF_MSG_UNKNOWN ] = "unknown page" ,
910934};
911935
@@ -1013,12 +1037,13 @@ static int me_kernel(struct page_state *ps, struct page *p)
10131037
10141038/*
10151039 * Page in unknown state. Do nothing.
1040+ * This is a catch-all in case we fail to make sense of the page state.
10161041 */
10171042static int me_unknown (struct page_state * ps , struct page * p )
10181043{
10191044 pr_err ("%#lx: Unknown page state\n" , page_to_pfn (p ));
10201045 unlock_page (p );
1021- return MF_FAILED ;
1046+ return MF_IGNORED ;
10221047}
10231048
10241049/*
@@ -2055,14 +2080,15 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
20552080 if (flags & MF_ACTION_REQUIRED ) {
20562081 folio = page_folio (p );
20572082 res = kill_accessing_process (current , folio_pfn (folio ), flags );
2083+ action_result (pfn , MF_MSG_ALREADY_POISONED , MF_FAILED );
20582084 }
20592085 return res ;
20602086 } else if (res == - EBUSY ) {
20612087 if (!(flags & MF_NO_RETRY )) {
20622088 flags |= MF_NO_RETRY ;
20632089 goto retry ;
20642090 }
2065- return action_result (pfn , MF_MSG_UNKNOWN , MF_IGNORED );
2091+ return action_result (pfn , MF_MSG_GET_HWPOISON , MF_IGNORED );
20662092 }
20672093
20682094 folio = page_folio (p );
@@ -2097,7 +2123,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
20972123
20982124 if (!hwpoison_user_mappings (folio , p , pfn , flags )) {
20992125 folio_unlock (folio );
2100- return action_result (pfn , MF_MSG_UNMAP_FAILED , MF_IGNORED );
2126+ return action_result (pfn , MF_MSG_UNMAP_FAILED , MF_FAILED );
21012127 }
21022128
21032129 return identify_page_state (pfn , p , page_flags );
@@ -2231,6 +2257,7 @@ int memory_failure(unsigned long pfn, int flags)
22312257 res = kill_accessing_process (current , pfn , flags );
22322258 if (flags & MF_COUNT_INCREASED )
22332259 put_page (p );
2260+ action_result (pfn , MF_MSG_ALREADY_POISONED , MF_FAILED );
22342261 goto unlock_mutex ;
22352262 }
22362263
@@ -2267,7 +2294,7 @@ int memory_failure(unsigned long pfn, int flags)
22672294 }
22682295 goto unlock_mutex ;
22692296 } else if (res < 0 ) {
2270- res = action_result (pfn , MF_MSG_UNKNOWN , MF_IGNORED );
2297+ res = action_result (pfn , MF_MSG_GET_HWPOISON , MF_IGNORED );
22712298 goto unlock_mutex ;
22722299 }
22732300 }
@@ -2363,7 +2390,7 @@ int memory_failure(unsigned long pfn, int flags)
23632390 * Abort on fail: __filemap_remove_folio() assumes unmapped page.
23642391 */
23652392 if (!hwpoison_user_mappings (folio , p , pfn , flags )) {
2366- res = action_result (pfn , MF_MSG_UNMAP_FAILED , MF_IGNORED );
2393+ res = action_result (pfn , MF_MSG_UNMAP_FAILED , MF_FAILED );
23672394 goto unlock_page ;
23682395 }
23692396
0 commit comments