Permalink
Browse files

MPI-3: add support for large counts using derived datatypes

Add support for MPI_Count type and MPI_COUNT datatype and add the required
MPI-3 functions MPI_Get_elements_x, MPI_Status_set_elements_x,
MPI_Type_get_extent_x, MPI_Type_get_true_extent_x, and MPI_Type_size_x.
This commit adds only the C bindings. Fortran bindins will be added in
another commit. For now the MPI_Count type is define to have the same size
as MPI_Offset. The type is required to be at least as large as MPI_Offset
and MPI_Aint. The type was initially intended to be a ssize_t (if it was
the same size as a long long) but there were issues compiling romio with
that definition (despite the inclusion of stddef.h).

I updated the datatype engine to use size_t instead of uint32_t to support
large datatypes. This will require some review to make sure that 1) the
changes are beneficial, 2) nothing was broken by the change (I doubt
anything was), and 3) there are no performance regressions due to this
change.
  • Loading branch information...
hjelmn hjelmn
hjelmn authored and hjelmn committed Jul 16, 2013
1 parent a6e5e8a commit db54d13404a241642fa783d5b3cc74edcb1103f2
View
@@ -796,9 +796,10 @@ AC_CACHE_SAVE
ompi_show_title "System-specific tests"
#
-# Test to determine type of MPI_Offset. This is searched in the following order
-# int64_t, long long, long, int. If none of these are 8 bytes, then we should
-# search for int32_t, long long, long, int.
+# Test to determine types of MPI_Offset and MPI_Count. This is searched in the
+# following order int64_t, long long, long, int. If none of these are 8 bytes,
+# then we should search for int32_t, long long, long, int. MPI_Count needs to
+# be at least as large as MPI_Offset and MPI_Aint.
#
MPI_OFFSET_TYPE="not found"
MPI_OFFSET_DATATYPE="not found"
@@ -807,30 +808,42 @@ if test $ac_cv_type_long_long = yes -a $ac_cv_sizeof_long_long = 8; then
MPI_OFFSET_TYPE="long long"
MPI_OFFSET_DATATYPE=MPI_LONG_LONG
MPI_OFFSET_SIZE=8
+ MPI_COUNT_TYPE="long long"
+ MPI_COUNT_SIZE=8
elif test $ac_cv_type_long = yes -a $ac_cv_sizeof_long = 8; then
MPI_OFFSET_TYPE="long"
MPI_OFFSET_DATATYPE=MPI_LONG
MPI_OFFSET_SIZE=8
+ MPI_COUNT_TYPE="long"
+ MPI_COUNT_SIZE=8
elif test $ac_cv_sizeof_int = 8; then
MPI_OFFSET_TYPE="int"
MPI_OFFSET_DATATYPE=MPI_INT
MPI_OFFSET_SIZE=8
+ MPI_COUNT_TYPE="int"
+ MPI_COUNT_SIZE=8
elif test $ac_cv_type_long_long = yes -a $ac_cv_sizeof_long_long = 4; then
MPI_OFFSET_TYPE="long long"
MPI_OFFSET_DATATYPE=MPI_LONG_LONG
MPI_OFFSET_SIZE=4
+ MPI_COUNT_TYPE="long long"
+ MPI_COUNT_SIZE=4
elif test $ac_cv_type_long = yes -a $ac_cv_sizeof_long = 4; then
MPI_OFFSET_TYPE="long"
MPI_OFFSET_DATATYPE=MPI_LONG
MPI_OFFSET_SIZE=4
+ MPI_COUNT_TYPE="long"
+ MPI_COUNT_SIZE=8
elif test $ac_cv_sizeof_int = 4; then
MPI_OFFSET_TYPE="int"
MPI_OFFSET_DATATYPE=MPI_INT
MPI_OFFSET_SIZE=4
+ MPI_COUNT_TYPE="int"
+ MPI_COUNT_SIZE=8
fi
AC_MSG_RESULT([$MPI_OFFSET_TYPE])
if test "$MPI_OFFSET_TYPE" = "not found"; then
- AC_MSG_WARN([*** Unable to find the right definition for MPI_Offset])
+ AC_MSG_WARN([*** Unable to find the right definition for MPI_Offset and MPI_Count])
AC_MSG_ERROR([Cannot continue])
fi
AC_MSG_CHECKING([checking for an MPI datatype for MPI_Offset])
@@ -845,6 +858,16 @@ AC_DEFINE_UNQUOTED(OMPI_MPI_OFFSET_TYPE, $MPI_OFFSET_TYPE, [Type of MPI_Offset -
AC_DEFINE_UNQUOTED(OMPI_MPI_OFFSET_SIZE, $MPI_OFFSET_SIZE, [Size of the MPI_Offset])
AC_DEFINE_UNQUOTED(OMPI_OFFSET_DATATYPE, $MPI_OFFSET_DATATYPE, [MPI datatype corresponding to MPI_Offset])
+if test $MPI_COUNT_SIZE -eq 8 ; then
+ MPI_COUNT_MAX="0x7fffffffffffffffll"
+elif test $MPI_COUNT_SIZE -eq 4 ; then
+ MPI_COUNT_MAX="0x7fffffffl"
+fi
+
+AC_DEFINE_UNQUOTED(OMPI_MPI_COUNT_SIZE, $MPI_COUNT_SIZE, [Size of the MPI_Count datatype])
+AC_DEFINE_UNQUOTED(OMPI_MPI_COUNT_TYPE, $MPI_COUNT_TYPE, [Type of the MPI_Count datatype])
+AC_DEFINE_UNQUOTED(MPI_COUNT_MAX, $MPI_COUNT_MAX, [Maximum value for an MPI_Count])
+
#
# Check for MPI_Aint type. Yes, there are platforms where
# sizeof(void*) != sizeof(long) (64 bit Windows, apparently).
@@ -272,14 +272,14 @@ ompi_datatype_get_true_extent( const ompi_datatype_t* type, OPAL_PTRDIFF_TYPE* t
return opal_datatype_get_true_extent( &type->super, true_lb, true_extent);
}
-static inline int32_t
+static inline ssize_t
ompi_datatype_get_element_count( const ompi_datatype_t* type, size_t iSize )
{
return opal_datatype_get_element_count( &type->super, iSize );
}
static inline int32_t
-ompi_datatype_set_element_count( const ompi_datatype_t* type, uint32_t count, size_t* length )
+ompi_datatype_set_element_count( const ompi_datatype_t* type, size_t count, size_t* length )
{
return opal_datatype_set_element_count( &type->super, count, length );
}
@@ -89,8 +89,15 @@
#define OMPI_DATATYPE_MPI_LB 0x2C
#define OMPI_DATATYPE_MPI_UB 0x2D
+
+/*
+ * Datatypes from the MPI 3.0 standard
+ */
+#define OMPI_DATATYPE_MPI_COUNT 0x2E
+
/* This should __ALWAYS__ stay last */
-#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x2E
+#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x2F
+
#define OMPI_DATATYPE_MPI_MAX_PREDEFINED (OMPI_DATATYPE_MPI_UNAVAILABLE+1)
@@ -274,6 +274,18 @@ ompi_predefined_datatype_t ompi_mpi_c_long_double_complex = OMPI_DATATYPE_INIT_D
ompi_predefined_datatype_t ompi_mpi_c_long_double_complex = OMPI_DATATYPE_INIT_UNAVAILABLE (LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
#endif /* HAVE_LONG_DOUBLE */
+/*
+ * MPI 3.0 Datatypes
+ */
+#if OMPI_MPI_COUNT_SIZE == 4
+ompi_predefined_datatype_t ompi_mpi_count = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE(INT32_T, COUNT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT);
+#elif OMPI_MPI_COUNT_SIZE == 8
+ompi_predefined_datatype_t ompi_mpi_count = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE(INT64_T, COUNT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT);
+#else
+ompi_predefined_datatype_t ompi_mpi_count = OMPI_DATATYPE_INIT_UNAVAILABLE_BASIC_TYPE(INT64_T, COUNT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT);
+#endif
+
+
/*
* NOTE: The order of this array *MUST* match what is listed in
* opal_datatype_internal.h and ompi_datatype_internal.h
@@ -335,7 +347,11 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF
&ompi_mpi_lb.dt, /* 0x2C */
&ompi_mpi_ub.dt, /* 0x2D */
- &ompi_mpi_unavailable.dt, /* 0x2E */
+
+ /* MPI 3.0 types */
+ &ompi_mpi_count.dt, /* 0x2E */
+
+ &ompi_mpi_unavailable.dt, /* 0x2F */
};
opal_pointer_array_t ompi_datatype_f_to_c_table;
@@ -622,6 +638,9 @@ int32_t ompi_datatype_init( void )
MOOG(aint, 66);
MOOG(offset, 67);
+ /* MPI 3.0 types */
+ MOOG(count, 68);
+
/**
* Now make sure all non-contiguous types are marked as such.
*/
@@ -237,7 +237,7 @@ typedef struct
int MPI_TAG;
int MPI_ERROR;
int _cancelled;
- int _ucount; /* size_t */
+ size_t _ucount;
} offset;
} ompi_status_public_t;
/* datatype structure */
View
@@ -13,7 +13,7 @@
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2009-2012 Oak Rigde National Laboratory. All rights reserved.
* Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
- * Copyright (c) 2012 Los Alamos Nat Security, LLC. All rights reserved.
+ * Copyright (c) 2012-2013 Los Alamos Nat Security, LLC. All rights reserved.
* Copyright (c) 2011-2013 INRIA. All rights reserved.
* $COPYRIGHT$
*
@@ -146,6 +146,12 @@
/* Size of the MPI_Offset corresponding type */
#undef OMPI_MPI_OFFSET_SIZE
+/* Type of MPI_Count */
+#undef OMPI_MPI_COUNT_TYPE
+
+/* Size of MPI_Count */
+#undef OMPI_MPI_COUNT_SIZE
+
/* type to use for ptrdiff_t, if it does not exist, set to ptrdiff_t if it does exist */
#undef OPAL_PTRDIFF_TYPE
@@ -316,6 +322,7 @@ extern "C" {
typedef OPAL_PTRDIFF_TYPE MPI_Aint;
typedef OMPI_MPI_OFFSET_TYPE MPI_Offset;
+typedef OMPI_MPI_COUNT_TYPE MPI_Count;
typedef struct ompi_communicator_t *MPI_Comm;
typedef struct ompi_datatype_t *MPI_Datatype;
typedef struct ompi_errhandler_t *MPI_Errhandler;
@@ -968,6 +975,7 @@ OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_int64_t;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_uint64_t;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_aint;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_offset;
+OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_count;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_bool;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_complex;
OMPI_DECLSPEC extern struct ompi_predefined_datatype_t ompi_mpi_c_float_complex;
@@ -1125,6 +1133,9 @@ OMPI_DECLSPEC extern MPI_Fint *MPI_F_STATUSES_IGNORE;
#define MPI_C_LONG_DOUBLE_COMPLEX OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_c_long_double_complex)
#endif
+/* New datatypes from the 3.0 standard */
+#define MPI_COUNT OMPI_PREDEFINED_GLOBAL(MPI_Datatype, ompi_mpi_count)
+
#define MPI_ERRORS_ARE_FATAL OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_are_fatal)
#define MPI_ERRORS_RETURN OMPI_PREDEFINED_GLOBAL(MPI_Errhandler, ompi_mpi_errors_return)
@@ -1405,6 +1416,7 @@ OMPI_DECLSPEC int MPI_Igatherv(void *sendbuf, int sendcount, MPI_Datatype sendt
OMPI_DECLSPEC int MPI_Get_address(void *location, MPI_Aint *address);
OMPI_DECLSPEC int MPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count);
OMPI_DECLSPEC int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count);
+OMPI_DECLSPEC int MPI_Get_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count *count);
OMPI_DECLSPEC int MPI_Get(void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count,
@@ -1599,6 +1611,8 @@ OMPI_DECLSPEC int MPI_Status_f2c(MPI_Fint *f_status, MPI_Status *c_status);
OMPI_DECLSPEC int MPI_Status_set_cancelled(MPI_Status *status, int flag);
OMPI_DECLSPEC int MPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype,
int count);
+OMPI_DECLSPEC int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype,
+ MPI_Count count);
OMPI_DECLSPEC int MPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
MPI_Status array_of_statuses[]);
OMPI_DECLSPEC int MPI_Testany(int count, MPI_Request array_of_requests[], int *index,
@@ -1667,10 +1681,14 @@ OMPI_DECLSPEC int MPI_Type_get_envelope(MPI_Datatype type, int *num_integers,
int *combiner);
OMPI_DECLSPEC int MPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb,
MPI_Aint *extent);
+OMPI_DECLSPEC int MPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb,
+ MPI_Count *extent);
OMPI_DECLSPEC int MPI_Type_get_name(MPI_Datatype type, char *type_name,
int *resultlen);
OMPI_DECLSPEC int MPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb,
MPI_Aint *true_extent);
+OMPI_DECLSPEC int MPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb,
+ MPI_Count *true_extent);
OMPI_DECLSPEC int MPI_Type_hindexed(int count, int array_of_blocklengths[],
MPI_Aint array_of_displacements[],
MPI_Datatype oldtype, MPI_Datatype *newtype)
@@ -1688,6 +1706,7 @@ OMPI_DECLSPEC int MPI_Type_set_attr(MPI_Datatype type, int type_keyval,
void *attr_val);
OMPI_DECLSPEC int MPI_Type_set_name(MPI_Datatype type, char *type_name);
OMPI_DECLSPEC int MPI_Type_size(MPI_Datatype type, int *size);
+OMPI_DECLSPEC int MPI_Type_size_x(MPI_Datatype type, MPI_Count *size);
OMPI_DECLSPEC int MPI_Type_struct(int count, int array_of_blocklengths[],
MPI_Aint array_of_displacements[],
MPI_Datatype array_of_types[],
@@ -2016,6 +2035,8 @@ OMPI_DECLSPEC int PMPI_Get_address(void *location, MPI_Aint *address);
OMPI_DECLSPEC int PMPI_Get_count(MPI_Status *status, MPI_Datatype datatype, int *count);
OMPI_DECLSPEC int PMPI_Get_elements(MPI_Status *status, MPI_Datatype datatype,
int *count);
+OMPI_DECLSPEC int PMPI_Get_elements_x(MPI_Status *status, MPI_Datatype datatype,
+ MPI_Count *count);
OMPI_DECLSPEC int PMPI_Get(void *origin_addr, int origin_count,
MPI_Datatype origin_datatype, int target_rank,
MPI_Aint target_disp, int target_count,
@@ -2210,6 +2231,8 @@ OMPI_DECLSPEC int PMPI_Status_f2c(MPI_Fint *f_status, MPI_Status *c_status);
OMPI_DECLSPEC int PMPI_Status_set_cancelled(MPI_Status *status, int flag);
OMPI_DECLSPEC int PMPI_Status_set_elements(MPI_Status *status, MPI_Datatype datatype,
int count);
+OMPI_DECLSPEC int PMPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype,
+ MPI_Count count);
OMPI_DECLSPEC int PMPI_Testall(int count, MPI_Request array_of_requests[], int *flag,
MPI_Status array_of_statuses[]);
OMPI_DECLSPEC int PMPI_Testany(int count, MPI_Request array_of_requests[], int *index, int *flag, MPI_Status *status);
@@ -2277,10 +2300,14 @@ OMPI_DECLSPEC int PMPI_Type_get_envelope(MPI_Datatype type, int *num_integers,
int *combiner);
OMPI_DECLSPEC int PMPI_Type_get_extent(MPI_Datatype type, MPI_Aint *lb,
MPI_Aint *extent);
+OMPI_DECLSPEC int PMPI_Type_get_extent_x(MPI_Datatype type, MPI_Count *lb,
+ MPI_Count *extent);
OMPI_DECLSPEC int PMPI_Type_get_name(MPI_Datatype type, char *type_name,
int *resultlen);
OMPI_DECLSPEC int PMPI_Type_get_true_extent(MPI_Datatype datatype, MPI_Aint *true_lb,
MPI_Aint *true_extent);
+OMPI_DECLSPEC int PMPI_Type_get_true_extent_x(MPI_Datatype datatype, MPI_Count *true_lb,
+ MPI_Count *true_extent);
OMPI_DECLSPEC int PMPI_Type_hindexed(int count, int array_of_blocklengths[],
MPI_Aint array_of_displacements[],
MPI_Datatype oldtype, MPI_Datatype *newtype)
@@ -2298,6 +2325,7 @@ OMPI_DECLSPEC int PMPI_Type_set_attr(MPI_Datatype type, int type_keyval,
void *attr_val);
OMPI_DECLSPEC int PMPI_Type_set_name(MPI_Datatype type, char *type_name);
OMPI_DECLSPEC int PMPI_Type_size(MPI_Datatype type, int *size);
+OMPI_DECLSPEC int PMPI_Type_size_x(MPI_Datatype type, MPI_Count *size);
OMPI_DECLSPEC int PMPI_Type_struct(int count, int array_of_blocklengths[],
MPI_Aint array_of_displacements[],
MPI_Datatype array_of_types[],
View
@@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2013 Los Alamos National Security, LLC. All rights
+ * reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -28,16 +30,14 @@
#if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES
#pragma weak MPI_Get_elements = PMPI_Get_elements
+#pragma weak MPI_Get_elements_x = PMPI_Get_elements_x
#endif
#if OMPI_PROFILING_DEFINES
#include "ompi/mpi/c/profile/defines.h"
#endif
-static const char FUNC_NAME[] = "MPI_Get_elements";
-
-
-int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count)
+static int _MPI_Get_elements(const char *func_name, MPI_Status *status, MPI_Datatype datatype, MPI_Count *count)
{
size_t size, internal_count;
int i;
@@ -58,7 +58,7 @@ int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count)
if (MPI_PARAM_CHECK) {
int err = MPI_SUCCESS;
- OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
+ OMPI_ERR_INIT_FINALIZE(func_name);
if (NULL == status || MPI_STATUSES_IGNORE == status ||
MPI_STATUS_IGNORE == status || NULL == count) {
err = MPI_ERR_ARG;
@@ -67,7 +67,7 @@ int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count)
} else {
OMPI_CHECK_DATATYPE_FOR_RECV(err, datatype, 1);
}
- OMPI_ERRHANDLER_CHECK(err, MPI_COMM_WORLD, err, FUNC_NAME);
+ OMPI_ERRHANDLER_CHECK(err, MPI_COMM_WORLD, err, func_name);
}
*count = 0;
@@ -105,16 +105,41 @@ int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count)
}
goto more_than_int_elements;
}
- return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
+ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, func_name);
more_than_int_elements:
- if( internal_count > ((size_t)INT_MAX) ) {
- /* We have more elements that we can represent with a signed int, and therefore
- * we're outside the standard here. I don't see what should we report back
- * here to make it useful. So, let's return an untouched *count and trigger
- * an MPI_ERR_TRUNCATE.
+ if( internal_count > (size_t) MPI_COUNT_MAX ) {
+ /* We have more elements that we can represent with a ssize_t. We must
+ * set count to MPI_UNDEFINED (MPI 3.0). We should still be able to return
+ * MPI_ERR_TRUNCATE here.
*/
- return MPI_ERR_TRUNCATE;
+ *count = MPI_UNDEFINED;
+ } else {
+ *count = (ssize_t) internal_count;
}
- *count = (int)internal_count;
+
return MPI_SUCCESS;
}
+
+int MPI_Get_elements(MPI_Status *status, MPI_Datatype datatype, int *count)
+{
+ MPI_Count lcount;
+ int rc;
+
+ rc = _MPI_Get_elements ("MPI_Get_elements", status, datatype, &lcount);
+ if (lcount > (MPI_Count) INT_MAX ) {
+ /* We have more elements that we can represent with a signed int. We must
+ * set count to MPI_UNDEFINED (MPI 3.0). We should still be able to return
+ * MPI_ERR_TRUNCATE here.
+ */
+ *count = MPI_UNDEFINED;
+ } else {
+ *count = (int) lcount;
+ }
+
+ return rc;
+}
+
+int MPI_Get_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count *count)
+{
+ return _MPI_Get_elements ("MPI_Get_elements_x", status, datatype, count);
+}
Oops, something went wrong.

0 comments on commit db54d13

Please sign in to comment.