2626#include <linux/bitmap.h>
2727#include <linux/math64.h>
2828#include <linux/mod_devicetable.h>
29+ #include <linux/adxl.h>
2930#include <acpi/nfit.h>
3031#include <asm/cpu_device_id.h>
3132#include <asm/intel-family.h>
3536#include "edac_module.h"
3637
3738#define EDAC_MOD_STR "skx_edac"
39+ #define MSG_SIZE 1024
3840
3941/*
4042 * Debug macros
5456static LIST_HEAD (skx_edac_list );
5557
5658static u64 skx_tolm , skx_tohm ;
59+ static char * skx_msg ;
60+ static unsigned int nvdimm_count ;
61+
62+ enum {
63+ INDEX_SOCKET ,
64+ INDEX_MEMCTRL ,
65+ INDEX_CHANNEL ,
66+ INDEX_DIMM ,
67+ INDEX_MAX
68+ };
69+
70+ static const char * const component_names [] = {
71+ [INDEX_SOCKET ] = "ProcessorSocketId" ,
72+ [INDEX_MEMCTRL ] = "MemoryControllerId" ,
73+ [INDEX_CHANNEL ] = "ChannelId" ,
74+ [INDEX_DIMM ] = "DimmSlotId" ,
75+ };
76+
77+ static int component_indices [ARRAY_SIZE (component_names )];
78+ static int adxl_component_count ;
79+ static const char * const * adxl_component_names ;
80+ static u64 * adxl_values ;
81+ static char * adxl_msg ;
5782
5883#define NUM_IMC 2 /* memory controllers per socket */
5984#define NUM_CHANNELS 3 /* channels per memory controller */
@@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
393418 u16 flags ;
394419 u64 size = 0 ;
395420
421+ nvdimm_count ++ ;
422+
396423 dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE (dimmno , chan , imc -> lmc ,
397424 imc -> src_id , 0 );
398425
@@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
941968}
942969#endif /*CONFIG_EDAC_DEBUG*/
943970
971+ static bool skx_adxl_decode (struct decoded_addr * res )
972+
973+ {
974+ int i , len = 0 ;
975+
976+ if (res -> addr >= skx_tohm || (res -> addr >= skx_tolm &&
977+ res -> addr < BIT_ULL (32 ))) {
978+ edac_dbg (0 , "Address 0x%llx out of range\n" , res -> addr );
979+ return false;
980+ }
981+
982+ if (adxl_decode (res -> addr , adxl_values )) {
983+ edac_dbg (0 , "Failed to decode 0x%llx\n" , res -> addr );
984+ return false;
985+ }
986+
987+ res -> socket = (int )adxl_values [component_indices [INDEX_SOCKET ]];
988+ res -> imc = (int )adxl_values [component_indices [INDEX_MEMCTRL ]];
989+ res -> channel = (int )adxl_values [component_indices [INDEX_CHANNEL ]];
990+ res -> dimm = (int )adxl_values [component_indices [INDEX_DIMM ]];
991+
992+ for (i = 0 ; i < adxl_component_count ; i ++ ) {
993+ if (adxl_values [i ] == ~0x0ull )
994+ continue ;
995+
996+ len += snprintf (adxl_msg + len , MSG_SIZE - len , " %s:0x%llx" ,
997+ adxl_component_names [i ], adxl_values [i ]);
998+ if (MSG_SIZE - len <= 0 )
999+ break ;
1000+ }
1001+
1002+ return true;
1003+ }
1004+
9441005static void skx_mce_output_error (struct mem_ctl_info * mci ,
9451006 const struct mce * m ,
9461007 struct decoded_addr * res )
9471008{
9481009 enum hw_event_mc_err_type tp_event ;
949- char * type , * optype , msg [ 256 ] ;
1010+ char * type , * optype ;
9501011 bool ripv = GET_BITFIELD (m -> mcgstatus , 0 , 0 );
9511012 bool overflow = GET_BITFIELD (m -> status , 62 , 62 );
9521013 bool uncorrected_error = GET_BITFIELD (m -> status , 61 , 61 );
@@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
10071068 break ;
10081069 }
10091070 }
1071+ if (adxl_component_count ) {
1072+ snprintf (skx_msg , MSG_SIZE , "%s%s err_code:%04x:%04x %s" ,
1073+ overflow ? " OVERFLOW" : "" ,
1074+ (uncorrected_error && recoverable ) ? " recoverable" : "" ,
1075+ mscod , errcode , adxl_msg );
1076+ } else {
1077+ snprintf (skx_msg , MSG_SIZE ,
1078+ "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x" ,
1079+ overflow ? " OVERFLOW" : "" ,
1080+ (uncorrected_error && recoverable ) ? " recoverable" : "" ,
1081+ mscod , errcode ,
1082+ res -> socket , res -> imc , res -> rank ,
1083+ res -> bank_group , res -> bank_address , res -> row , res -> column );
1084+ }
10101085
1011- snprintf (msg , sizeof (msg ),
1012- "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x" ,
1013- overflow ? " OVERFLOW" : "" ,
1014- (uncorrected_error && recoverable ) ? " recoverable" : "" ,
1015- mscod , errcode ,
1016- res -> socket , res -> imc , res -> rank ,
1017- res -> bank_group , res -> bank_address , res -> row , res -> column );
1018-
1019- edac_dbg (0 , "%s\n" , msg );
1086+ edac_dbg (0 , "%s\n" , skx_msg );
10201087
10211088 /* Call the helper to output message */
10221089 edac_mc_handle_error (tp_event , mci , core_err_cnt ,
10231090 m -> addr >> PAGE_SHIFT , m -> addr & ~PAGE_MASK , 0 ,
10241091 res -> channel , res -> dimm , -1 ,
1025- optype , msg );
1092+ optype , skx_msg );
1093+ }
1094+
1095+ static struct mem_ctl_info * get_mci (int src_id , int lmc )
1096+ {
1097+ struct skx_dev * d ;
1098+
1099+ if (lmc > NUM_IMC - 1 ) {
1100+ skx_printk (KERN_ERR , "Bad lmc %d\n" , lmc );
1101+ return NULL ;
1102+ }
1103+
1104+ list_for_each_entry (d , & skx_edac_list , list ) {
1105+ if (d -> imc [0 ].src_id == src_id )
1106+ return d -> imc [lmc ].mci ;
1107+ }
1108+
1109+ skx_printk (KERN_ERR , "No mci for src_id %d lmc %d\n" , src_id , lmc );
1110+
1111+ return NULL ;
10261112}
10271113
10281114static int skx_mce_check_error (struct notifier_block * nb , unsigned long val ,
@@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
10401126 if ((mce -> status & 0xefff ) >> 7 != 1 || !(mce -> status & MCI_STATUS_ADDRV ))
10411127 return NOTIFY_DONE ;
10421128
1129+ memset (& res , 0 , sizeof (res ));
10431130 res .addr = mce -> addr ;
1044- if (!skx_decode (& res ))
1131+
1132+ if (adxl_component_count ) {
1133+ if (!skx_adxl_decode (& res ))
1134+ return NOTIFY_DONE ;
1135+
1136+ mci = get_mci (res .socket , res .imc );
1137+ } else {
1138+ if (!skx_decode (& res ))
1139+ return NOTIFY_DONE ;
1140+
1141+ mci = res .dev -> imc [res .imc ].mci ;
1142+ }
1143+
1144+ if (!mci )
10451145 return NOTIFY_DONE ;
1046- mci = res .dev -> imc [res .imc ].mci ;
10471146
10481147 if (mce -> mcgstatus & MCG_STATUS_MCIP )
10491148 type = "Exception" ;
@@ -1094,6 +1193,62 @@ static void skx_remove(void)
10941193 }
10951194}
10961195
1196+ static void __init skx_adxl_get (void )
1197+ {
1198+ const char * const * names ;
1199+ int i , j ;
1200+
1201+ names = adxl_get_component_names ();
1202+ if (!names ) {
1203+ skx_printk (KERN_NOTICE , "No firmware support for address translation." );
1204+ skx_printk (KERN_CONT , " Only decoding DDR4 address!\n" );
1205+ return ;
1206+ }
1207+
1208+ for (i = 0 ; i < INDEX_MAX ; i ++ ) {
1209+ for (j = 0 ; names [j ]; j ++ ) {
1210+ if (!strcmp (component_names [i ], names [j ])) {
1211+ component_indices [i ] = j ;
1212+ break ;
1213+ }
1214+ }
1215+
1216+ if (!names [j ])
1217+ goto err ;
1218+ }
1219+
1220+ adxl_component_names = names ;
1221+ while (* names ++ )
1222+ adxl_component_count ++ ;
1223+
1224+ adxl_values = kcalloc (adxl_component_count , sizeof (* adxl_values ),
1225+ GFP_KERNEL );
1226+ if (!adxl_values ) {
1227+ adxl_component_count = 0 ;
1228+ return ;
1229+ }
1230+
1231+ adxl_msg = kzalloc (MSG_SIZE , GFP_KERNEL );
1232+ if (!adxl_msg ) {
1233+ adxl_component_count = 0 ;
1234+ kfree (adxl_values );
1235+ }
1236+
1237+ return ;
1238+ err :
1239+ skx_printk (KERN_ERR , "'%s' is not matched from DSM parameters: " ,
1240+ component_names [i ]);
1241+ for (j = 0 ; names [j ]; j ++ )
1242+ skx_printk (KERN_CONT , "%s " , names [j ]);
1243+ skx_printk (KERN_CONT , "\n" );
1244+ }
1245+
1246+ static void __exit skx_adxl_put (void )
1247+ {
1248+ kfree (adxl_values );
1249+ kfree (adxl_msg );
1250+ }
1251+
10971252/*
10981253 * skx_init:
10991254 * make sure we are running on the correct cpu model
@@ -1158,6 +1313,15 @@ static int __init skx_init(void)
11581313 }
11591314 }
11601315
1316+ skx_msg = kzalloc (MSG_SIZE , GFP_KERNEL );
1317+ if (!skx_msg ) {
1318+ rc = - ENOMEM ;
1319+ goto fail ;
1320+ }
1321+
1322+ if (nvdimm_count )
1323+ skx_adxl_get ();
1324+
11611325 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
11621326 opstate_init ();
11631327
@@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
11761340 edac_dbg (2 , "\n" );
11771341 mce_unregister_decode_chain (& skx_mce_dec );
11781342 skx_remove ();
1343+ if (nvdimm_count )
1344+ skx_adxl_put ();
1345+ kfree (skx_msg );
11791346 teardown_skx_debug ();
11801347}
11811348
0 commit comments