Skip to content

Commit

Permalink
6618751 Include memboard in T5440 FBR/FBU diagnosis
Browse files Browse the repository at this point in the history
6650181 cpumemDE handling of the ereport dsc and dsu with the new vf dram-ear format
6754532 mem_unusable() fails to nv_alloc topo return value
6712823 SPARC-Enterprise-T2000 topo map should use motherboard enumerator
  • Loading branch information
Louis Tsien authored and Louis Tsien committed Oct 30, 2008
1 parent 8838ec3 commit 2d6aa54
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 60 deletions.
165 changes: 119 additions & 46 deletions usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_branch.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
* Use is subject to license terms.
*/

#pragma ident "%Z%%M% %I% %E% SMI"

#include <cmd_mem.h>
#include <cmd_branch.h>
#include <cmd_dimm.h>
Expand All @@ -45,31 +43,34 @@
#define BUF_SIZE 120
#define LEN_CMP 6

int
is_t5440_unum(const char *unum)
{
if ((strncmp(unum, "MB/CPU", LEN_CMP) == 0) ||
(strncmp(unum, "MB/MEM", LEN_CMP) == 0))
return (1);
return (0);
}
/*
* mbd_label: If a DIMM associated with this branch is located on a memory
* expansion board or riser board, return (pointer to) the label of that board;
* otherwise return NULL.
* We assume that there will be at most one such board for any branch.
*/

int
is_dimm_on_memboard(cmd_branch_t *branch)
char *
mbd_label(fmd_hdl_t *hdl, cmd_branch_t *branch, const char *nacname)
{
cmd_dimm_t *dimm;
cmd_branch_memb_t *bm;
char *p;
size_t s;

if (is_t5440_unum(branch->branch_unum)) {
for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL;
bm = cmd_list_next(bm)) {
dimm = bm->dimm;
if (strstr(dimm->dimm_unum, "MEM") != NULL) {
return (1);
}
for (bm = cmd_list_next(&branch->branch_dimms); bm != NULL;
bm = cmd_list_next(bm)) {
dimm = bm->dimm;
if ((p = strstr(dimm->dimm_unum, nacname)) != NULL) {
p = strchr(p, '/'); /* include instance number */
s = p - dimm->dimm_unum;
p = fmd_hdl_zalloc(hdl, s+1, FMD_SLEEP);
(void) strncpy(p, dimm->dimm_unum, s);
*(p + s) = '\0';
return (p);
}
}
return (0);
return (NULL);
}

void
Expand Down Expand Up @@ -129,8 +130,9 @@ branch_dimm_create(fmd_hdl_t *hdl, char *dimm_unum, char **serids,
return (NULL);
}

static fmd_hdl_t *br_hdl; /* for exclusive use of callback */
static fmd_hdl_t *br_hdl; /* for use by callbacks */
static int br_dimmcount;
static nvlist_t *br_memb_nvl;

/*ARGSUSED*/
static int
Expand Down Expand Up @@ -224,62 +226,133 @@ branch_dimmlist_create(fmd_hdl_t *hdl, cmd_branch_t *branch)
topo_walk_fini(twp);
fmd_hdl_topo_rele(hdl, thp);

for (dimm_count = 0, bp = &branch->branch_dimms; bp != NULL;
bp = cmd_list_next(bp), dimm_count++)
for (dimm_count = 0, bp = cmd_list_next(&branch->branch_dimms);
bp != NULL; bp = cmd_list_next(bp), dimm_count++)
;
return (dimm_count);
}

/*ARGSUSED*/
static int
fru_by_label_cb(topo_hdl_t *thp, tnode_t *node, void *arg)
{
char *lbl;
int err;
char *target = (char *)arg;

if (topo_node_label(node, &lbl, &err) < 0)
return (TOPO_WALK_NEXT); /* no label, try next */

if ((strcmp(target, lbl) == 0) &&
(topo_node_fru(node, &br_memb_nvl, NULL, &err) == 0)) {
topo_hdl_strfree(thp, lbl);
return (TOPO_WALK_TERMINATE);
}
topo_hdl_strfree(thp, lbl);
return (TOPO_WALK_NEXT);
}

static nvlist_t *
fru_by_label(fmd_hdl_t *hdl, const char *target)
{
topo_hdl_t *thp;
topo_walk_t *twp;
int err;

br_memb_nvl = NULL;
if (((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) != NULL) &&
((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
fru_by_label_cb, (void *)target, &err)) != NULL)) {
br_hdl = hdl;
(void) topo_walk_step(twp, TOPO_WALK_CHILD);
topo_walk_fini(twp);
}
fmd_hdl_topo_rele(hdl, thp);
return (br_memb_nvl);
}

static void
add_bdflt_to_case(fmd_hdl_t *hdl, char *label, const char *fltnm,
uint8_t board_cert, fmd_case_t *cp)
{
nvlist_t *memb_nvl, *flt;

memb_nvl = fru_by_label(hdl, label);
if (memb_nvl != NULL) {
flt = cmd_nvl_create_fault(hdl, fltnm, board_cert,
memb_nvl, memb_nvl, NULL);
flt = cmd_fault_add_location(hdl, flt, label);
if (flt != NULL) {
fmd_case_add_suspect(hdl, cp, flt);
}
nvlist_free(memb_nvl);
}
}

/*
* For t5440, the memory channel goes like this:
* VF -> cpuboard -> D0 -> motherboard -> memboard -> D[1..3]
* If there is a dimm on the memory board, the memory board,
* motherboard, cpuboard, and dimms are in the suspect list.
* If there is no dimm on the memory board, the cpu board and
* the dimms are in the suspect list
* memory board fault does not supported in this pharse of
* the project.
* The board certainty = total board certainty / number of
* the faulty boards in the suspect list.
*/
void
cmd_branch_create_fault(fmd_hdl_t *hdl, cmd_branch_t *branch,
const char *fltnm, nvlist_t *asru)
{
nvlist_t *flt, *mbnvl;
nvlist_t *flt;
cmd_branch_memb_t *bm;
cmd_dimm_t *dimm;
int dimm_count = 0;
uint_t cert = 0;
uint_t board_cert = 0;
char *fruloc = NULL;
int count_board_fault = 1;
int memb_flag = 0;
char *fruloc = NULL, *membd_label;

/* attach the dimms to the branch */
dimm_count = branch_dimmlist_create(hdl, branch);

if (is_dimm_on_memboard(branch)) {
mbnvl = init_mb(hdl);
if (mbnvl != NULL)
count_board_fault++;
memb_flag = 1;
}
if ((membd_label = mbd_label(hdl, branch, "MEM")) != NULL) {
board_cert = CMD_BOARDS_CERT / 3; /* CPU, MEM, MB */

board_cert = CMD_BOARDS_CERT / count_board_fault;
/*
* Batoka with memory expansion. CPU expansion board will
* be added below. Add memory expansion board and motherboard
* FRUs here.
*/

/* add the motherboard fault */
if ((memb_flag) && (mbnvl != NULL)) {
fmd_hdl_debug(hdl,
"cmd_branch_create_fault: create motherboard fault");
flt = cmd_boardfru_create_fault(hdl, mbnvl, fltnm,
board_cert, "MB");
if (flt != NULL)
fmd_case_add_suspect(hdl, branch->branch_case.cc_cp,
flt);
nvlist_free(mbnvl);
add_bdflt_to_case(hdl, membd_label, fltnm, board_cert,
branch->branch_case.cc_cp);
fmd_hdl_strfree(hdl, membd_label);
add_bdflt_to_case(hdl, "MB", fltnm, board_cert,
branch->branch_case.cc_cp);

} else if ((membd_label = mbd_label(hdl, branch, "MR")) != NULL) {

board_cert = CMD_BOARDS_CERT / 2; /* MB, MR */

/*
* Maramba or similar platform with mezzanine board.
* Motherboard FRU will be added below. Add the mezzanine
* board here.
*/

add_bdflt_to_case(hdl, membd_label, fltnm, board_cert,
branch->branch_case.cc_cp);
fmd_hdl_strfree(hdl, membd_label);
} else {
board_cert = CMD_BOARDS_CERT; /* only MB or CPU */
}

/*
* The code which follows adds to the suspect list the FRU which
* contains the ereport 'detector'. This can be either a CPU
* expansion board (Batoka), or motherboard (Huron, Maramba, or
* derivative).
*/

fruloc = cmd_getfru_loc(hdl, asru);
flt = cmd_boardfru_create_fault(hdl, asru, fltnm, board_cert, fruloc);
if (flt != NULL)
Expand Down
21 changes: 17 additions & 4 deletions usr/src/cmd/fm/modules/sun4v/cpumem-diagnosis/cmd_memerr_arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
* Use is subject to license terms.
*/

#pragma ident "%Z%%M% %I% %E% SMI"

/*
* Ereport-handling routines for memory errors
*/
Expand Down Expand Up @@ -60,6 +58,7 @@
#define VF_L2ESYR_C2C 0x8000000000000000ULL
#define UTS2_CPUS_PER_CHIP 64
#define FBR_ERROR ".fbr"
#define DSU_ERROR ".dsu"

extern ldom_hdl_t *cpumem_diagnosis_lhp;

Expand Down Expand Up @@ -293,7 +292,10 @@ cmd_evdisp_t
cmd_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_errcl_t clcode)
{
return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common));
if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsc") == 0)
return (CMD_EVD_UNUSED); /* drop VF dsc's */
else
return (xe_common(hdl, ep, nvl, class, clcode, cmd_ce_common));
}

/*ARGSUSED*/
Expand Down Expand Up @@ -325,7 +327,15 @@ cmd_evdisp_t
cmd_ue(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_errcl_t clcode)
{
return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common));
if (strcmp(class, "ereport.cpu.ultraSPARC-T2plus.dsu") == 0)
/*
* VF dsu's need to be treated like branch errors,
* because we can't localize to a single DIMM or pair of
* DIMMs given missing/invalid parts of the dram-ear.
*/
return (cmd_fb(hdl, ep, nvl, class, clcode));
else
return (xe_common(hdl, ep, nvl, class, clcode, cmd_ue_common));
}

/*ARGSUSED*/
Expand Down Expand Up @@ -412,6 +422,9 @@ cmd_fb(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class,
cmd_branch_create_fault(hdl, branch,
"fault.memory.link-c", det);
}
} else if (strcmp(strrchr(class, '.'), DSU_ERROR) == 0) {
fmd_hdl_debug(hdl, "Processing dsu event");
cmd_branch_create_fault(hdl, branch, "fault.memory.bank", det);
} else {
fmd_hdl_debug(hdl, "Processing fbu event");
cmd_branch_create_fault(hdl, branch, "fault.memory.link-u",
Expand Down
11 changes: 1 addition & 10 deletions usr/src/lib/fm/topo/maps/SUNW,Sun-Fire-T200/SPARC-Enterprise-T2000-hc-topology.xml
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,11 @@
information: Portions Copyright [yyyy] [name of copyright owner]
CDDL HEADER END
ident "%Z%%M% %I% %E% SMI"
-->

<topology name='SUNW,SPARC-Enterprise-T2000' scheme='hc'>
<range name='motherboard' min='0' max='0'>
<node instance='0'>
<propgroup name='protocol' version='1'
name-stability='Private' data-stability='Private' >
<propval name='label' type='string'
value='MB' />
</propgroup>
</node>
<enum-method name='motherboard' version='1'/>
<dependents grouping='children'>
<range name='chip' min='0' max='0'>
<enum-method name='chip' version='1' />
Expand Down
2 changes: 2 additions & 0 deletions usr/src/lib/fm/topo/modules/sun4v/platform-mem/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,8 @@ mem_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t vers,
retval = 1;
}

if (topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) != 0)
return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
if (nvlist_add_uint32(*out, TOPO_METH_UNUSABLE_RET, retval) != 0) {
nvlist_free(*out);
return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
Expand Down

0 comments on commit 2d6aa54

Please sign in to comment.