Skip to content

Commit

Permalink
transfer the page address to pre/post-sync-trigger scripts
Browse files Browse the repository at this point in the history
Signed-off-by: Zou Cao <zoucao@linux.alibaba.com>
  • Loading branch information
Zou Cao committed May 16, 2018
1 parent 0dcbe4a commit 18531c2
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 15 deletions.
10 changes: 5 additions & 5 deletions memdb.c
Expand Up @@ -132,7 +132,7 @@ static char *format_location(struct memdimm *md)

/* Run a user defined trigger when a error threshold is crossed. */
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
struct err_type *et, struct bucket_conf *bc, bool sync)
struct err_type *et, struct bucket_conf *bc, char *args[], bool sync)
{
struct leaky_bucket *bucket = &et->bucket;
char *env[MAX_ENV];
Expand Down Expand Up @@ -172,7 +172,7 @@ void memdb_trigger(char *msg, struct memdimm *md, time_t t,
xasprintf(&env[ei++], "THRESHOLD_COUNT=%d", bucket->count);
env[ei] = NULL;
assert(ei < MAX_ENV);
run_trigger(bc->trigger, NULL, env, sync);
run_trigger(bc->trigger, args, env, sync);
for (i = 0; i < ei; i++)
free(env[i]);
out:
Expand All @@ -194,7 +194,7 @@ account_over(struct err_triggers *t, struct memdimm *md, struct mce *m, unsigned
char *msg;
xasprintf(&msg, "Fallback %s memory error count %d exceeded threshold",
t->type, corr_err_cnt);
memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, false);
memdb_trigger(msg, md, 0, &md->ce, &t->ce_bucket_conf, NULL, false);
free(msg);
}
}
Expand All @@ -211,11 +211,11 @@ account_memdb(struct err_triggers *t, struct memdimm *md, struct mce *m)
if (m->status & MCI_STATUS_UC) {
md->uc.count++;
if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time))
memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, false);
memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false);
} else {
md->ce.count++;
if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time))
memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, false);
memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false);
}
free(msg);
}
Expand Down
2 changes: 1 addition & 1 deletion memdb.h
Expand Up @@ -20,5 +20,5 @@ void memory_error(struct mce *m, int channel, int dimm, unsigned corr_err_cnt,

struct memdimm;
void memdb_trigger(char *msg, struct memdimm *md, time_t t,
struct err_type *et, struct bucket_conf *bc, bool sync);
struct err_type *et, struct bucket_conf *bc, char *argv[], bool sync);
struct memdimm *get_memdimm(int socketid, int channel, int dimm, int insert);
24 changes: 19 additions & 5 deletions page.c
Expand Up @@ -220,26 +220,40 @@ void account_page_error(struct mce *m, int channel, int dimm)
xasprintf(&msg, "Corrected memory errors on page %llx exceed threshold %s",
addr, thresh);
free(thresh);
memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, false);
memdb_trigger(msg, md, t, &mp->ce, &page_trigger_conf, NULL, false);
free(msg);
mp->triggered = 1;

if (offline == OFFLINE_SOFT || offline == OFFLINE_SOFT_THEN_HARD) {
struct bucket_conf page_soft_trigger_conf;
char *argv[] = {
NULL,
NULL,
NULL,
};
char *args;

asprintf(&args, "%lld", addr);
argv[0]=args;

memcpy(&page_soft_trigger_conf, &page_trigger_conf, sizeof(struct bucket_conf));
page_soft_trigger_conf.trigger = page_error_pre_soft_trigger;
asprintf(&msg, "pre soft trigger run for page %llx", addr);
memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, true);
argv[0]=page_error_pre_soft_trigger;
argv[1]=args;
asprintf(&msg, "pre soft trigger run for page %lld", addr);
memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
free(msg);

offline_action(mp, addr);

memcpy(&page_soft_trigger_conf, &page_trigger_conf, sizeof(struct bucket_conf));
page_soft_trigger_conf.trigger = page_error_post_soft_trigger;
asprintf(&msg, "post soft trigger run for page %llx", addr);
memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, true);
argv[0]=page_error_post_soft_trigger;
argv[1]=args;
asprintf(&msg, "post soft trigger run for page %lld", addr);
memdb_trigger(msg, md, t, &mp->ce, &page_soft_trigger_conf, argv, true);
free(msg);
free(args);

} else
offline_action(mp, addr);
Expand Down
5 changes: 3 additions & 2 deletions triggers/page-error-post-sync-soft-trigger
Expand Up @@ -18,21 +18,22 @@
# UCCOUNT Total uncorrected error count for DIMM
# LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds)
# THRESHOLD_COUNT Total umber of events in current threshold time period of specific type
# ARGUMENTS:$1 the page address of soft offline
#
# note: will run as mcelog configured user
# this can be changed in mcelog.conf

logger -s -p daemon.err -t mcelog "$MESSAGE"
logger -s -p daemon.err -t mcelog "Location: $LOCATION"

[ -x ./page-error-post-sync-soft-trigger.local ] && . ./page-error-post-sync-soft-trigger.local
[ -x ./page-error-post-sync-soft-trigger.local ] && . ./page-error-post-sync-soft-trigger.local $1

if [ -d page-error-post-sync-soft-trigger.extern ]
then
ls page-error-post-sync-soft-trigger.extern |
while read item
do
[ -x ./page-error-post-sync-soft-trigger.extern/$item ] && . ./page-error-post-sync-soft-trigger.extern/$item
[ -x ./page-error-post-sync-soft-trigger.extern/$item ] && . ./page-error-post-sync-soft-trigger.extern/$item $1
done
fi

Expand Down
5 changes: 3 additions & 2 deletions triggers/page-error-pre-sync-soft-trigger
Expand Up @@ -18,21 +18,22 @@
# UCCOUNT Total uncorrected error count for DIMM
# LASTEVENT Time stamp of event that triggered threshold (in time_t format, seconds)
# THRESHOLD_COUNT Total umber of events in current threshold time period of specific type
# ARGUMENTS:$1 the page address of soft offline
#
# note: will run as mcelog configured user
# this can be changed in mcelog.conf

logger -s -p daemon.err -t mcelog "$MESSAGE"
logger -s -p daemon.err -t mcelog "Location: $LOCATION"

[ -x ./page-error-pre-soft-trigger.local ] && . ./page-error-pre-soft-trigger.local
[ -x ./page-error-pre-soft-trigger.local ] && . ./page-error-pre-soft-trigger.local $1

if [ -d page-error-pre-sync-soft-trigger.extern ]
then
ls page-error-pre-sync-soft-trigger.extern |
while read item
do
[ -x ./page-error-pre-sync-soft-trigger.extern/$item ] && . ./page-error-pre-sync-soft-trigger.extern/$item
[ -x ./page-error-pre-sync-soft-trigger.extern/$item ] && . ./page-error-pre-sync-soft-trigger.extern/$item $1
done
fi

Expand Down

0 comments on commit 18531c2

Please sign in to comment.