diff --git a/opal/mca/btl/vader/btl_vader_endpoint.h b/opal/mca/btl/vader/btl_vader_endpoint.h index 1a6b1ddd2d1..bc57d2d95a5 100644 --- a/opal/mca/btl/vader/btl_vader_endpoint.h +++ b/opal/mca/btl/vader/btl_vader_endpoint.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006-2007 Voltaire. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -48,14 +48,16 @@ typedef struct mca_btl_base_endpoint_t { /* per peer buffers */ struct { unsigned char *buffer; /**< starting address of peer's fast box out */ - unsigned int start, seq; uint32_t *startp; + unsigned int start; + uint16_t seq; } fbox_in; struct { unsigned char *buffer; /**< starting address of peer's fast box in */ - unsigned int start, end, seq; uint32_t *startp; /**< pointer to location storing start offset */ + unsigned int start, end; + uint16_t seq; } fbox_out; int32_t peer_smp_rank; /**< my peer's SMP process rank. Used for accessing diff --git a/opal/mca/btl/vader/btl_vader_fbox.h b/opal/mca/btl/vader/btl_vader_fbox.h index f48ea916162..d646263054a 100644 --- a/opal/mca/btl/vader/btl_vader_fbox.h +++ b/opal/mca/btl/vader/btl_vader_fbox.h @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -18,9 +18,16 @@ typedef union mca_btl_vader_fbox_hdr_t { struct { + /* NTH: on 32-bit platforms loading/unloading the header may be completed + * in multiple instructions. To ensure that seq is never loaded before tag + * and the tag is never read before seq put them in the same 32-bits of the + * header. */ + /** message tag */ uint16_t tag; - uint16_t size; - uint32_t seq; + /** sequence number */ + uint16_t seq; + /** message size */ + uint32_t size; } data; uint64_t ival; } mca_btl_vader_fbox_hdr_t; @@ -40,6 +47,13 @@ typedef union mca_btl_vader_fbox_hdr_t { void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, mca_btl_base_endpoint_t *ep); +static inline void mca_btl_vader_fbox_set_header (mca_btl_vader_fbox_hdr_t *hdr, uint16_t tag, + uint16_t seq, uint32_t size) +{ + mca_btl_vader_fbox_hdr_t tmp = {.data = {.tag = tag, .seq = seq, .size = size}}; + hdr->ival = tmp.ival; +} + /* attempt to reserve a contiguous segment from the remote ep */ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t *ep, size_t size) { @@ -88,12 +102,10 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t /* if this is the end of the buffer and the fragment doesn't fit then mark the remaining buffer space to * be skipped and check if the fragment can be written at the beginning of the buffer. */ if (OPAL_UNLIKELY(buffer_free > 0 && buffer_free < size && start <= end)) { - mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = buffer_free - sizeof (mca_btl_vader_fbox_hdr_t), - .seq = ep->fbox_out.seq++, .tag = 0xff}}; - BTL_VERBOSE(("message will not fit in remaining buffer space. skipping to beginning")); - MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival; + mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0xff, ep->fbox_out.seq++, + buffer_free - sizeof (mca_btl_vader_fbox_hdr_t)); end = MCA_BTL_VADER_FBOX_ALIGNMENT; /* toggle the high bit */ @@ -114,11 +126,7 @@ static inline unsigned char *mca_btl_vader_reserve_fbox (mca_btl_base_endpoint_t (unsigned int) size, end, start, end, hbs, buffer_free)); /* write out part of the header now. the tag will be written when the data is available */ - { - mca_btl_vader_fbox_hdr_t tmp = {.data = {.size = data_size, .tag = 0, .seq = ep->fbox_out.seq++}}; - - MCA_BTL_VADER_FBOX_HDR(dst)->ival = tmp.ival; - } + mca_btl_vader_fbox_set_header (MCA_BTL_VADER_FBOX_HDR(dst), 0, ep->fbox_out.seq++, data_size); end += size;