Skip to content

Commit

Permalink
Merge pull request #369 from daltonbohning/master
Browse files Browse the repository at this point in the history
daos: dcp: enhanced error checking
  • Loading branch information
daltonbohning committed Sep 15, 2020
2 parents a9cd09d + c41d37d commit de44ae4
Show file tree
Hide file tree
Showing 8 changed files with 426 additions and 132 deletions.
1 change: 1 addition & 0 deletions src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# todo re-asses if all of these must be *installed*
LIST(APPEND libmfu_install_headers
mfu.h
mfu_errors.h
mfu_bz2.h
mfu_flist.h
mfu_flist_internal.h
Expand Down
32 changes: 32 additions & 0 deletions src/common/mfu_errors.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* Defines common error codes */

/* enable C++ codes to include this header directly */
#ifdef __cplusplus
extern "C" {
#endif

#ifndef MFU_ERRORS_H
#define MFU_ERRORS_H

/* Generic error codes */
#define MFU_ERR 1000
#define MFU_ERR_INVAL_ARG 1001

/* DCP-specific error codes */
#define MFU_ERR_DCP 1100
#define MFU_ERR_DCP_COPY 1101

/* DAOS-specific error codes*/
#define MFU_ERR_DAOS 4000
#define MFU_ERR_DAOS_INVAL_ARG 4001

/* Error macros */
#define MFU_ERRF "%s(%d)"
#define MFU_ERRP(rc) "MFU_ERR", rc

#endif /* MFU_ERRORS_H */

/* enable C++ codes to include this header directly */
#ifdef __cplusplus
} /* extern "C" */
#endif
30 changes: 21 additions & 9 deletions src/common/mfu_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ int mfu_file_access(const char* path, int amode, mfu_file_t* mfu_file)
int rc = mfu_access(path, amode);
return rc;
} else if (mfu_file->type == DAOS) {
int rc = daos_access(path, amode);
int rc = daos_access(path, amode, mfu_file);
return rc;
} else {
MFU_ABORT(-1, "File type not known: %s type=%d",
Expand Down Expand Up @@ -227,11 +227,23 @@ int mfu_access(const char* path, int amode)
return rc;
}

int daos_access(const char* path, int amode)
int daos_access(const char* path, int amode, mfu_file_t* mfu_file)
{
#ifdef DAOS_SUPPORT
/* noop becuase daos have an access call */
return 0;
char* name = NULL;
char* dir_name = NULL;
parse_filename(path, &name, &dir_name);
assert(dir_name);

dfs_obj_t* parent = NULL;
int rc = dfs_access(mfu_file->dfs, parent, name, amode);
if (rc) {
MFU_LOG(MFU_LOG_ERR, "dfs_access %s failed (%d %s)",
name, rc, strerror(rc));
errno = rc;
rc = -1;
}
return rc;
#endif
}

Expand Down Expand Up @@ -264,7 +276,7 @@ int daos_chmod(const char *path, mode_t mode, mfu_file_t* mfu_file)
parse_filename(path, &name, &dir_name);
assert(dir_name);

dfs_obj_t *parent = NULL;
dfs_obj_t* parent = NULL;
int rc = dfs_lookup(mfu_file->dfs, dir_name, O_RDWR, &parent, NULL, NULL);
if (parent != NULL) {
rc = dfs_chmod(mfu_file->dfs, parent, name, mode);
Expand Down Expand Up @@ -346,7 +358,7 @@ int daos_stat(const char* path, struct stat* buf, mfu_file_t* mfu_file) {
parse_filename(path, &name, &dir_name);
assert(dir_name);

dfs_obj_t *parent = NULL;
dfs_obj_t* parent = NULL;
int rc;
if (mfu_file->only_daos) {
rc = dfs_lookup(mfu_file->dfs, dir_name, O_RDWR, &parent, NULL, NULL);
Expand Down Expand Up @@ -564,7 +576,7 @@ void daos_open(const char* file, int flags, mode_t mode, mfu_file_t* mfu_file)
parse_filename(file, &name, &dir_name);
assert(dir_name);

dfs_obj_t *parent = NULL;
dfs_obj_t* parent = NULL;
int rc = dfs_lookup(mfu_file->dfs, dir_name, O_RDWR, &parent, NULL, NULL);
if (parent != NULL) {
rc = dfs_open(mfu_file->dfs, parent, name,
Expand Down Expand Up @@ -1054,7 +1066,7 @@ int daos_mkdir(const char* dir, mode_t mode, mfu_file_t* mfu_file) {
assert(dir_name);

/* Need to lookup parent directory in DFS */
dfs_obj_t *parent = NULL;
dfs_obj_t* parent = NULL;
int rc = dfs_lookup(mfu_file->dfs, dir_name, O_RDWR, &parent, NULL, NULL);

/* only call mkdir if the dir_name is not the root DFS directory */
Expand Down Expand Up @@ -1131,7 +1143,7 @@ int mfu_rmdir(const char* dir)

#ifdef DAOS_SUPPORT
struct dfs_mfu_t {
dfs_obj_t *dir;
dfs_obj_t* dir;
struct dirent ents[NUM_DIRENTS];
daos_anchor_t anchor;
int num_ents;
Expand Down
2 changes: 1 addition & 1 deletion src/common/mfu_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ struct stat64;
/* calls access, and retries a few times if we get EIO or EINTR */
int mfu_file_access(const char* path, int amode, mfu_file_t* mfu_file);
int mfu_access(const char* path, int amode);
int daos_access(const char* path, int amode);
int daos_access(const char* path, int amode, mfu_file_t* mfu_file);

/* calls lchown, and retries a few times if we get EIO or EINTR */
int mfu_lchown(const char* path, uid_t owner, gid_t group);
Expand Down
15 changes: 9 additions & 6 deletions src/common/mfu_param_path.c
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,6 @@ void mfu_param_path_check_copy(uint64_t num, const mfu_param_path* paths,
*flag_valid = 0;
*flag_copy_into_dir = 0;

if (mfu_src_file->type == DAOS || mfu_dst_file->type == DAOS) {
if (num != 1) {
MFU_LOG(MFU_LOG_ERR, "Only one source can be specified when using DAOS");
}
}

/* need at least two paths to have a shot at being valid */
if (num < 1 || paths == NULL || destpath == NULL) {
return;
Expand All @@ -426,6 +420,15 @@ void mfu_param_path_check_copy(uint64_t num, const mfu_param_path* paths,

/* just have rank 0 check */
if(rank == 0) {
/* DAOS-specific error checks*/
if (mfu_src_file->type == DAOS || mfu_dst_file->type == DAOS) {
if (num != 1) {
MFU_LOG(MFU_LOG_ERR, "Only one source can be specified when using DAOS");
valid = 0;
goto bcast;
}
}

/* count number of readable source paths */
uint64_t i;
int num_readable = 0;
Expand Down
87 changes: 59 additions & 28 deletions src/common/mfu_util.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,58 +107,89 @@ void daos_bcast_handle(
mfu_free(&global.iov_buf);
}

void daos_connect(
int daos_connect(
int rank,
const char* svc,
uuid_t pool_uuid,
uuid_t cont_uuid,
daos_handle_t* poh,
daos_handle_t* coh)
daos_handle_t* coh,
bool connect_pool,
bool create_cont)
{
/* TODO: if src daos path and dst daos path are false
* skip connecting to daos pool */
/* assume failure until otherwise */
int valid = 0;
int rc;

/* have rank 0 connect to the pool and container,
* we'll then broadcast the handle ids from rank 0 to everyone else */
if (rank == 0) {
d_rank_list_t* svcl = daos_rank_list_parse(svc, ":");
if (svcl == NULL) {
MFU_ABORT(-1, "Failed to parse DAOS rank list: '%s'", svc);
}
/* Parse svc and connect to DAOS pool */
if (connect_pool) {
d_rank_list_t* svcl = daos_rank_list_parse(svc, ":");
if (svcl == NULL) {
MFU_LOG(MFU_LOG_ERR, "Failed to parse DAOS rank list: '%s'", svc);
goto bcast;
}

/* Connect to DAOS pool */
daos_pool_info_t pool_info;
int rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
poh, &pool_info, NULL);
if (rc != 0) {
MFU_LOG(MFU_LOG_ERR, "Failed to connect to pool");
daos_pool_info_t pool_info;
rc = daos_pool_connect(pool_uuid, NULL, svcl, DAOS_PC_RW,
poh, &pool_info, NULL);
if (rc != 0) {
MFU_LOG(MFU_LOG_ERR, "Failed to connect to pool");
d_rank_list_free(svcl);
goto bcast;
}
d_rank_list_free(svcl);
}
d_rank_list_free(svcl);

/* attempt to open the daos container */
/* Try to open the container
* If NOEXIST we create it */
daos_cont_info_t co_info;
rc = daos_cont_open(*poh, cont_uuid, DAOS_COO_RW, coh, &co_info, NULL);

/* If NOEXIST we create it */
if (rc != 0) {
/* create the container */
uuid_t cuuid;
rc = dfs_cont_create(*poh, cuuid, NULL, NULL, NULL);
if (!create_cont) {
MFU_LOG(MFU_LOG_ERR, "Failed to open DFS container");
goto bcast;
}

rc = dfs_cont_create(*poh, cont_uuid, NULL, NULL, NULL);
if (rc != 0) {
MFU_LOG(MFU_LOG_ERR, "Failed to create DFS container");
goto bcast;
}

/* try to open it again */
rc = daos_cont_open(*poh, cuuid, DAOS_COO_RW, coh, &co_info, NULL);
rc = daos_cont_open(*poh, cont_uuid, DAOS_COO_RW, coh, &co_info, NULL);
if (rc != 0) {
MFU_LOG(MFU_LOG_ERR, "Failed to open DFS container");
goto bcast;
}
}

/* everything looks good so far */
valid = 1;
}

bcast:
/* broadcast valid from rank 0 */
MPI_Bcast(&valid, 1, MPI_INT, 0, MPI_COMM_WORLD);

/* return if invalid */
if (valid == 0) {
return -1;
}

/* broadcast pool and container handles from rank 0 */
daos_bcast_handle(rank, poh, poh, POOL_HANDLE);
/* broadcast pool handle from rank 0
* If connect_pool is false, then the handle was unchanged */
if (connect_pool) {
daos_bcast_handle(rank, poh, poh, POOL_HANDLE);
}

/* broadcast container handle from rank 0 */
daos_bcast_handle(rank, coh, poh, CONT_HANDLE);

return 0;
}
#endif

Expand Down Expand Up @@ -1092,11 +1123,11 @@ void mfu_stripe_set(const char *path, uint64_t stripe_size, int stripe_count)
}

/* executes a logical AND operation on flag on all procs on comm,
* returns 1 if all true and 0 otherwise */
int mfu_alltrue(int flag, MPI_Comm comm)
* returns true if all true and false otherwise */
bool mfu_alltrue(bool flag, MPI_Comm comm)
{
/* check that all processes wrote successfully */
int alltrue;
MPI_Allreduce(&flag, &alltrue, 1, MPI_INT, MPI_LAND, comm);
bool alltrue;
MPI_Allreduce(&flag, &alltrue, 1, MPI_C_BOOL, MPI_LAND, comm);
return alltrue;
}
8 changes: 5 additions & 3 deletions src/common/mfu_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,15 @@ void daos_bcast_handle(
);

/* connect to DAOS pool, and then open container */
void daos_connect(
int daos_connect(
int rank,
const char* svc,
uuid_t pool_uuid,
uuid_t cont_uuid,
daos_handle_t* poh,
daos_handle_t* coh
daos_handle_t* coh,
bool connect_pool,
bool create_cont
);
#endif

Expand Down Expand Up @@ -286,7 +288,7 @@ void mfu_stripe_set(const char *path, uint64_t stripe_size, int stripe_count);

/* executes a logical AND operation on flag on all procs on comm,
* returns 1 if all true and 0 otherwise */
int mfu_alltrue(int flag, MPI_Comm comm);
bool mfu_alltrue(bool flag, MPI_Comm comm);

#endif /* MFU_UTIL_H */

Expand Down

0 comments on commit de44ae4

Please sign in to comment.