Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
MRP-455 cascading client evictions
It may happen that
- MDS is overloaded with enqueues, they consume all the threads on
  MDS_REQUEST portal and waiting for a lock a client holds;
- that client tries to re-connect but MDS is out of threads and
  re-connection fails;
- other clients are waiting for their enqueue completions, they try
  to ping MDS if it is still alive, but despite the fact it is a HP-rpc,
  there is no thread reserved for it. Thus, other clients get timed
  out as well.

Ensure each service which handles HP-rpc has an extra thread reserved
for them; make MDS_CONNECT and OST_CONNECT HP-rpc.

Reviewed-by: Alexey Lyashkov <alexey_lyashkov@xyratex.com>
Reviewed-by: Andrew Perepechko <Andrew_Perepechko@xyratex.com>
  • Loading branch information
Vitaly Fertman committed Mar 21, 2012
1 parent 3d25461 commit afcf3cf
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 21 deletions.
4 changes: 3 additions & 1 deletion lustre/include/lustre_net.h
Expand Up @@ -130,7 +130,7 @@
#define LDLM_MAXREPSIZE (1024)

/** Absolute limits */
#define MDT_MIN_THREADS 2UL
#define MDT_MIN_THREADS 3UL /* difficult replies, HPQ, others */
#ifndef MDT_MAX_THREADS
#define MDT_MAX_THREADS 512UL
#endif
Expand Down Expand Up @@ -1459,6 +1459,7 @@ struct ptlrpc_service_conf {
int psc_min_threads;
int psc_max_threads;
__u32 psc_ctx_tags;
int (*psc_hpreq_handler)(struct ptlrpc_request *);
};

/* ptlrpc/service.c */
Expand All @@ -1473,6 +1474,7 @@ void ptlrpc_save_lock (struct ptlrpc_request *req,
void ptlrpc_commit_replies(struct obd_export *exp);
void ptlrpc_dispatch_difficult_reply (struct ptlrpc_reply_state *rs);
void ptlrpc_schedule_difficult_reply (struct ptlrpc_reply_state *rs);
int ptlrpc_hpreq_handler(struct ptlrpc_request *req);
struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c,
svc_handler_t h, char *name,
struct proc_dir_entry *proc_entry,
Expand Down
27 changes: 18 additions & 9 deletions lustre/mdt/mdt_handler.c
Expand Up @@ -3864,17 +3864,18 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
*/
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
.psc_ctx_tags = LCT_MD_THREAD,
.psc_hpreq_handler = ptlrpc_hpreq_handler,
};

m->mdt_ldlm_client = &m->mdt_md_dev.md_lu_dev.ld_obd->obd_ldlm_client;
ptlrpc_init_client(LDLM_CB_REQUEST_PORTAL, LDLM_CB_REPLY_PORTAL,
"mdt_ldlm_client", m->mdt_ldlm_client);

m->mdt_regular_service =
ptlrpc_init_svc_conf(&conf, mdt_regular_handle, LUSTRE_MDT_NAME,
procfs_entry, target_print_req,
LUSTRE_MDT_NAME);

if (m->mdt_regular_service == NULL)
RETURN(-ENOMEM);

Expand All @@ -3896,7 +3897,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
.psc_ctx_tags = LCT_MD_THREAD,
.psc_hpreq_handler = NULL,
};
m->mdt_readpage_service =
ptlrpc_init_svc_conf(&conf, mdt_readpage_handle,
Expand Down Expand Up @@ -3927,7 +3929,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
.psc_ctx_tags = LCT_MD_THREAD,
.psc_hpreq_handler = NULL,
};

m->mdt_setattr_service =
Expand Down Expand Up @@ -3957,7 +3960,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD,
.psc_hpreq_handler = NULL,
};

m->mdt_mdsc_service =
Expand Down Expand Up @@ -3986,7 +3990,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD,
.psc_hpreq_handler = NULL,
};

m->mdt_mdss_service =
Expand Down Expand Up @@ -4018,7 +4023,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD
.psc_ctx_tags = LCT_MD_THREAD|LCT_DT_THREAD,
.psc_hpreq_handler = NULL,
};

m->mdt_dtss_service =
Expand All @@ -4045,7 +4051,8 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_DT_THREAD|LCT_MD_THREAD
.psc_ctx_tags = LCT_DT_THREAD|LCT_MD_THREAD,
.psc_hpreq_handler = NULL,
};

m->mdt_fld_service =
Expand Down Expand Up @@ -4075,8 +4082,10 @@ static int mdt_start_ptlrpc_service(struct mdt_device *m)
.psc_watchdog_factor = MDT_SERVICE_WATCHDOG_FACTOR,
.psc_min_threads = mdt_min_threads,
.psc_max_threads = mdt_max_threads,
.psc_ctx_tags = LCT_MD_THREAD
.psc_ctx_tags = LCT_MD_THREAD,
.psc_hpreq_handler = ptlrpc_hpreq_handler,
};

m->mdt_xmds_service =
ptlrpc_init_svc_conf(&conf, mdt_xmds_handle,
LUSTRE_MDT_NAME "_mds",
Expand Down
2 changes: 1 addition & 1 deletion lustre/ost/ost_handler.c
Expand Up @@ -2461,7 +2461,7 @@ static int ost_setup(struct obd_device *obd, struct lustre_cfg* lcfg)
ost_handle, LUSTRE_OSS_NAME,
obd->obd_proc_entry, target_print_req,
oss_min_threads, oss_max_threads,
"ll_ost", LCT_DT_THREAD, NULL);
"ll_ost", LCT_DT_THREAD, ptlrpc_hpreq_handler);
if (ost->ost_service == NULL) {
CERROR("failed to start service\n");
GOTO(out_lprocfs, rc = -ENOMEM);
Expand Down
1 change: 1 addition & 0 deletions lustre/ptlrpc/ptlrpc_module.c
Expand Up @@ -225,6 +225,7 @@ EXPORT_SYMBOL(ptlrpc_start_thread);
EXPORT_SYMBOL(ptlrpc_unregister_service);
EXPORT_SYMBOL(ptlrpc_service_health_check);
EXPORT_SYMBOL(ptlrpc_hpreq_reorder);
EXPORT_SYMBOL(ptlrpc_hpreq_handler);

/* pack_generic.c */
EXPORT_SYMBOL(lustre_msg_check_version);
Expand Down
36 changes: 26 additions & 10 deletions lustre/ptlrpc/service.c
Expand Up @@ -441,10 +441,10 @@ struct ptlrpc_service *ptlrpc_init_svc_conf(struct ptlrpc_service_conf *c,
return ptlrpc_init_svc(c->psc_nbufs, c->psc_bufsize,
c->psc_max_req_size, c->psc_max_reply_size,
c->psc_req_portal, c->psc_rep_portal,
c->psc_watchdog_factor,
h, name, proc_entry,
c->psc_watchdog_factor, h, name, proc_entry,
prntfn, c->psc_min_threads, c->psc_max_threads,
threadname, c->psc_ctx_tags, NULL);
threadname, c->psc_ctx_tags,
c->psc_hpreq_handler);
}
EXPORT_SYMBOL(ptlrpc_init_svc_conf);

Expand All @@ -456,6 +456,28 @@ static void ptlrpc_at_timer(unsigned long castmeharder)
cfs_waitq_signal(&svc->srv_waitq);
}

static int ptlrpc_hpreq_check(struct ptlrpc_request *req)
{
return 1;
}

static struct ptlrpc_hpreq_ops ptlrpc_hpreq_common = {
.hpreq_lock_match = NULL,
.hpreq_check = ptlrpc_hpreq_check,
};

/* Hi-Priority RPC check by RPC operation code. */
int ptlrpc_hpreq_handler(struct ptlrpc_request *req)
{
int opc = lustre_msg_get_opc(req->rq_reqmsg);

if (req->rq_export &&
(opc == OBD_PING || opc == MDS_CONNECT || opc == OST_CONNECT))
req->rq_ops = &ptlrpc_hpreq_common;

return 0;
}

/**
* Initialize service on a given portal.
* This includes starting serving threads , allocating and posting rqbds and
Expand Down Expand Up @@ -1290,14 +1312,8 @@ void ptlrpc_hpreq_reorder(struct ptlrpc_request *req)
/** Check if the request is a high priority one. */
static int ptlrpc_server_hpreq_check(struct ptlrpc_request *req)
{
int opc, rc = 0;
int rc = 0;
ENTRY;

/* Check by request opc. */
opc = lustre_msg_get_opc(req->rq_reqmsg);
if (opc == OBD_PING)
RETURN(1);

/* Perform request specific check. */
if (req->rq_ops && req->rq_ops->hpreq_check)
rc = req->rq_ops->hpreq_check(req);
Expand Down

0 comments on commit afcf3cf

Please sign in to comment.