Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Test atomic operations against fixed symbols.

  • Loading branch information...
commit 56a6b26d8365f2e6624cfb657ac269e8710aac4b 1 parent 5c17a72
@ckennelly authored
View
265 src/global_context_memcheck.cpp
@@ -1425,6 +1425,109 @@ void global_context_memcheck::instrument_and(const statement_t & statement,
*keep = true;
}
+/**
+ * Searches for the variable in the block (and its parents), returning true
+ * if the variable is found.
+ *
+ * If not NULL, f_ returns a pointer to the containing function.
+ * If not NULL, v_ returns a pointer to the variable.
+ */
+static bool find_variable(const std::string & id, space_t space,
+ const block_t * block, const function_t ** f_,
+ const variable_t ** v_) {
+ bool found = false;
+ const function_t * f = NULL;
+ const variable_t * vr = NULL;
+
+ const size_t isize = id.size();
+ while (block && !(found)) {
+ assert(!(block->parent) || !(block->fparent));
+ assert(block->block_type == block_scope);
+ const scope_t * s = block->scope;
+
+ const size_t vn = s->variables.size();
+ for (size_t vi = 0; vi < vn; vi++) {
+ const variable_t & v = s->variables[vi];
+ if (v.space != space) {
+ continue;
+ }
+
+ const size_t vsize = v.name.size();
+ const size_t msize = std::min(vsize, isize);
+ if (v.has_suffix && memcmp(v.name.c_str(), id.c_str(), msize) == 0) {
+ /* Matched the prefix, now parse the suffix.*/
+ int suffix;
+ int p = sscanf(id.c_str() + msize, "%d", &suffix);
+ if (p == 1 && suffix >= 0 && suffix < v.suffix) {
+ found = true;
+ vr = &v;
+ }
+ } else if (!(v.has_suffix) && id == v.name) {
+ found = true;
+ vr = &v;
+ break;
+ }
+
+ /* Consistency check. */
+ assert(!(v.has_suffix ^ (v.suffix >= 0)));
+ }
+
+ /* Move up. */
+ f = block->fparent;
+ block = block->parent;
+ }
+
+ if (f_) {
+ *f_ = f;
+ }
+
+ if (found && v_) {
+ *v_ = vr;
+ }
+
+ return found;
+}
+
+/**
+ * Like find_variable but conducts its search in the global scope.
+ */
+static bool find_variable_global(const std::string & id, space_t space,
+ const ptx_t * ptx, const variable_t ** v_) {
+ bool found = false;
+ const variable_t * vr = NULL;
+
+ const size_t vn = ptx->variables.size();
+ const size_t isize = id.size();
+ for (size_t vi = 0; vi < vn; vi++) {
+ const variable_t & v = ptx->variables[vi];
+ if (v.space != space) {
+ continue;
+ }
+
+ const size_t vsize = v.name.size();
+ const size_t msize = std::min(vsize, isize);
+ if (v.has_suffix && memcmp(v.name.c_str(), id.c_str(), msize) == 0) {
+ /* Matched the prefix, now parse the suffix.*/
+ int s;
+ int p = sscanf(id.c_str() + msize, "%d", &s);
+ if (p == 1 && s >= 0 && s <= v.suffix) {
+ found = true;
+ vr = &v;
+ }
+ } else if (!(v.has_suffix) && id == v.name) {
+ found = true;
+ vr = &v;
+ break;
+ }
+ }
+
+ if (found && v_) {
+ *v_ = vr;
+ }
+
+ return found;
+}
+
void global_context_memcheck::instrument_atom(const statement_t & statement,
statement_vt * aux, bool * keep, internal::auxillary_t * auxillary) {
assert((statement.op == op_atom && (statement.operands.size() >= 3u ||
@@ -1552,54 +1655,17 @@ void global_context_memcheck::instrument_atom(const statement_t & statement,
const std::string & id = addr.identifier[0];
/* Walk up scopes for identifiers. */
- const block_t * block = auxillary->block;
const function_t * f = NULL;
-
- bool found = false;
- bool flexible = false;
- size_t size;
-
- while (block && !(found)) {
- assert(!(block->parent) || !(block->fparent));
- assert(block->block_type == block_scope);
- const scope_t * s = block->scope;
-
- const size_t vn = s->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = s->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- size = v.size();
- break;
- }
- }
-
- /* Move up. */
- f = block->fparent;
- block = block->parent;
- }
+ const variable_t * v = NULL;
+ bool found = find_variable(id, shared_space, auxillary->block, &f, &v);
if (!(found)) {
assert(f);
-
- const ptx_t * p = f->parent;
- const size_t vn = p->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = p->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- flexible = v.array_flexible;
- if (!(flexible)) {
- size = v.size();
- }
- break;
- }
- }
+ found = find_variable_global(id, shared_space, f->parent, &v);
}
- if (found && !(flexible)) {
+ const size_t size = found ? v->size() : 0u;
+ if (found && !(v->array_flexible)) {
/* We found a fixed symbol, verify we do not statically overrun
* it. */
const size_t end = width + (size_t) addr.offset;
@@ -1617,7 +1683,11 @@ void global_context_memcheck::instrument_atom(const statement_t & statement,
assert(ret < (int) sizeof(msg) - 1);
logger::instance().print(msg);
- /* Cast off bits into the ether. */
+ /* Cast off bits into the ether and mark result as invalid. */
+ if (!(reduce)) {
+ aux->push_back(make_mov(btype, vd, -1));
+ }
+
return;
} else {
/* Map it to the validity symbol, preserving offset. */
@@ -3039,53 +3109,18 @@ void global_context_memcheck::instrument_ld(const statement_t & statement,
const std::string & id = src.identifier[0];
/* Walk up scopes for identifiers. */
- const block_t * b = auxillary->block;
const function_t * f = NULL;
-
- bool found = false;
- bool flexible = false;
- size_t size;
-
- while (b && !(found)) {
- assert(!(b->parent) || !(b->fparent));
- assert(b->block_type == block_scope);
- const scope_t * s = b->scope;
-
- const size_t vn = s->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = s->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- size = v.size();
- break;
- }
- }
-
- /* Move up. */
- f = b->fparent;
- b = b->parent;
- }
+ const variable_t * v = NULL;
+ bool found = find_variable(id, shared_space, auxillary->block, &f, &v);
if (!(found)) {
assert(f);
-
- const ptx_t * p = f->parent;
- const size_t vn = p->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = p->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- flexible = v.array_flexible;
- if (!(flexible)) {
- size = v.size();
- }
- break;
- }
- }
+ found = find_variable_global(id, shared_space, f->parent, &v);
}
+ const bool flexible = found ? v->array_flexible : false;
+ const size_t size = found ? v->size() : 0;
+
if (found && !(flexible)) {
/* We found a fixed symbol, verify we do not
* statically overrun it. */
@@ -3114,7 +3149,7 @@ void global_context_memcheck::instrument_ld(const statement_t & statement,
char msg[256];
int ret = snprintf(msg, sizeof(msg),
- "Shared store of %zu bytes at offset "
+ "Shared load of %zu bytes at offset "
"%ld will overrun buffer:\n"
"Disassembly: %s\n", width,
src.offset, ss.str().c_str());
@@ -5206,53 +5241,18 @@ void global_context_memcheck::instrument_st(const statement_t & statement,
const std::string & id = dst.identifier[0];
/* Walk up scopes for identifiers. */
- const block_t * b = auxillary->block;
const function_t * f = NULL;
-
- bool found = false;
- bool flexible = false;
- size_t size;
-
- while (b && !(found)) {
- assert(!(b->parent) || !(b->fparent));
- assert(b->block_type == block_scope);
- const scope_t * s = b->scope;
-
- const size_t vn = s->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = s->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- size = v.size();
- break;
- }
- }
-
- /* Move up. */
- f = b->fparent;
- b = b->parent;
- }
+ const variable_t * v = NULL;
+ bool found = find_variable(id, shared_space, auxillary->block, &f, &v);
if (!(found)) {
assert(f);
-
- const ptx_t * p = f->parent;
- const size_t vn = p->variables.size();
- for (size_t vi = 0; vi < vn; vi++) {
- const variable_t & v = p->variables[vi];
- if (id == v.name && !(v.has_suffix) &&
- v.space != reg_space) {
- found = true;
- flexible = v.array_flexible;
- if (!(flexible)) {
- size = v.size();
- }
- break;
- }
- }
+ found = find_variable_global(id, shared_space, f->parent, &v);
}
+ const bool flexible = found ? v->array_flexible : false;
+ const size_t size = found ? v->size() : 0u;
+
if (found && !(flexible)) {
/* We found a fixed symbol, verify we do not statically overrun
* it. */
@@ -5301,9 +5301,7 @@ void global_context_memcheck::instrument_st(const statement_t & statement,
new_vdst.field.push_back(field_none);
}
} else {
- /* Verify address against the shared size
- * parameter. TODO: Verify we don't overrun
- * the buffer. */
+ /* Verify address against the shared size parameter. */
const operand_t limit =
operand_t::make_identifier(__shared_reg);
@@ -5319,8 +5317,13 @@ void global_context_memcheck::instrument_st(const statement_t & statement,
original_ptr, operand_t::make_iconstant(dst.offset)));
}
- aux->push_back(make_setp(uptr_t, cmp_ge, valid_pred, limit,
- original_ptr));
+ {
+ const temp_ptr tmp(*auxillary);
+ aux->push_back(make_add(uptr_t, tmp, original_ptr,
+ operand_t::make_iconstant((int) width)));
+ aux->push_back(make_setp(uptr_t, cmp_ge, valid_pred, limit,
+ tmp));
+ }
statement_t new_store = statement;
/**
View
1  src/ptx_grammar.yy
@@ -203,6 +203,7 @@ dataType : dataTypeToken {
declarationSpace : TOKEN_REG | TOKEN_PARAM | TOKEN_LOCAL | TOKEN_SHARED ;
declarationSuffix : TOKEN_LANGLE TOKEN_CONSTANT_DECIMAL TOKEN_RANGLE {
parser->function->top->variable.suffix = $<vsigned>2;
+ parser->function->top->variable.has_suffix = true;
};
declarationSuffix : /* */ ;
View
10 src/ptx_ir.cpp
@@ -850,10 +850,12 @@ size_t variable_t::size() const {
size_t scale = 1u;
if (is_array) {
- assert(!(array_flexible) && "TODO Support flexible arrays");
-
- for (unsigned i = 0; i < array_dimensions; i++) {
- scale *= array_size[i];
+ if (array_flexible) {
+ scale = 0u;
+ } else {
+ for (unsigned i = 0; i < array_dimensions; i++) {
+ scale *= array_size[i];
+ }
}
}
View
1  src/tests/.gitignore
@@ -52,6 +52,7 @@ vtest_k_array
vtest_k_atomic_generic
vtest_k_atomic_global
vtest_k_atomic_shared
+vtest_k_atomic_shared_offsets
vtest_k_ballot
vtest_k_bitfield
vtest_k_blas
View
3  src/tests/Makefile
@@ -75,6 +75,9 @@ vtest_k_atomic_global: vtest_k_atomic_global.o k_atomic_kernels.o
vtest_k_atomic_shared: vtest_k_atomic_shared.o k_atomic_kernels.o
nvcc -o vtest_k_atomic_shared $^ $(LDFLAGS)
+vtest_k_atomic_shared_offsets.o: vtest_k_atomic_shared_offsets.cu Makefile
+ $(NVCC) -g $(INCLUDEDIR) -c $< -o $@ -arch sm_13
+
vtest_k_ballot.o: vtest_k_ballot.cu Makefile
$(NVCC) -g $(INCLUDEDIR) -c $< -o $@ -arch sm_20
View
283 src/tests/vtest_k_atomic_shared_offsets.cu
@@ -0,0 +1,283 @@
+/**
+ * Panoptes - A Binary Translation Framework for CUDA
+ * (c) 2011-2012 Chris Kennelly <chris@ckennelly.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <algorithm>
+#include <gtest/gtest.h>
+#include <stdint.h>
+#include <valgrind/memcheck.h>
+#include <vector>
+
+class SharedOffsets : public ::testing::Test {
+public:
+ SharedOffsets() { }
+ ~SharedOffsets() { }
+
+ void SetUp() {
+ cudaError_t ret;
+ ret = cudaStreamCreate(&stream);
+ EXPECT_EQ(cudaSuccess, ret);
+
+ threads = 256;
+ ret = cudaMalloc((void **) &d, threads * sizeof(*d));
+ EXPECT_EQ(cudaSuccess, ret);
+
+ reset = false;
+ }
+
+ void TearDown() {
+ cudaError_t ret;
+
+ if (reset) {
+ ret = cudaDeviceReset();
+ EXPECT_EQ(cudaSuccess, ret);
+ } else {
+ ret = cudaStreamDestroy(stream);
+ EXPECT_EQ(cudaSuccess, ret);
+
+ ret = cudaFree(d);
+ EXPECT_EQ(cudaSuccess, ret);
+ }
+ }
+
+ cudaStream_t stream;
+ uint32_t threads;
+ uint32_t * d;
+ bool reset;
+};
+
+static __global__ void k_known_symbol(uint32_t * d, uint32_t base) {
+ uint32_t out;
+ /**
+ * __shared__ uint32_t u;
+ * u = a;
+ * __syncthreads();
+ * out = atomicInc(u, 0xFFFFFFFF);
+ */
+ asm volatile(
+ "{ .shared .align 4 .u32 u;\n"
+ "st.shared.u32 [u], %1;\n"
+ "bar.sync 0;\n"
+ "atom.shared.inc.u32 %0, [u], -1;\n}" : "=r"(out) : "r"(base));
+ d[threadIdx.x] = out;
+}
+
+TEST_F(SharedOffsets, KnownSymbol) {
+ cudaError_t ret;
+
+ const uint32_t base = 256;
+ k_known_symbol<<<1, threads, 0, stream>>>(d, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::vector<uint32_t> hd(threads);
+ ret = cudaMemcpy(hd.data(), d, sizeof(*d) * threads,
+ cudaMemcpyDeviceToHost);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::sort(hd.begin(), hd.end());
+ for (uint32_t i = 0; i < threads; i++) {
+ EXPECT_EQ(i + base, hd[i]);
+ }
+}
+
+static __global__ void k_known_symbol_suffix(uint32_t * d, uint32_t base) {
+ uint32_t out;
+ /**
+ * __shared__ uint32_t u<2>;
+ * u1 = a;
+ * __syncthreads();
+ * out = atomicInc(u1, 0xFFFFFFFF);
+ */
+ asm volatile(
+ "{ .shared .align 4 .u32 u<2>;\n"
+ "st.shared.u32 [u1], %1;\n"
+ "bar.sync 0;\n"
+ "atom.shared.inc.u32 %0, [u1], -1;\n}" : "=r"(out) : "r"(base));
+ d[threadIdx.x] = out;
+}
+
+TEST_F(SharedOffsets, KnownSuffixedSymbol) {
+ cudaError_t ret;
+
+ const uint32_t base = 256;
+ k_known_symbol_suffix<<<1, threads, 0, stream>>>(d, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::vector<uint32_t> hd(threads);
+ ret = cudaMemcpy(hd.data(), d, sizeof(*d) * threads,
+ cudaMemcpyDeviceToHost);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::sort(hd.begin(), hd.end());
+ for (uint32_t i = 0; i < threads; i++) {
+ EXPECT_EQ(i + base, hd[i]);
+ }
+}
+
+static __global__ void k_known_symbol_offsets(uint32_t * d, uint32_t base) {
+ uint32_t out;
+ /**
+ * __shared__ uint32_t u;
+ * u = a;
+ * __syncthreads();
+ * out = atomicInc(u, 0xFFFFFFFF);
+ */
+ asm volatile(
+ "{ .shared .align 4 .u32 u[2];\n"
+ "st.shared.u32 [u+4], %1;\n"
+ "bar.sync 0;\n"
+ "atom.shared.inc.u32 %0, [u+4], -1;\n}" : "=r"(out) : "r"(base));
+ d[threadIdx.x] = out;
+}
+
+TEST_F(SharedOffsets, KnownSymbolOffsets) {
+ cudaError_t ret;
+
+ const uint32_t base = 256;
+ k_known_symbol_offsets<<<1, threads, 0, stream>>>(d, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::vector<uint32_t> hd(threads);
+ ret = cudaMemcpy(hd.data(), d, sizeof(*d) * threads,
+ cudaMemcpyDeviceToHost);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::sort(hd.begin(), hd.end());
+ for (uint32_t i = 0; i < threads; i++) {
+ EXPECT_EQ(i + base, hd[i]);
+ }
+}
+
+/**
+ * a is the final "result" of the address. Since we are storing a value into
+ * a loaded from an invalid address [u+4], it should be uninitialized.
+ */
+static __global__ void k_known_symbol_overrun(uint32_t * d, uint32_t * a,
+ uint32_t base) {
+ uint32_t out;
+ uint32_t aout;
+ /**
+ * __shared__ uint32_t u;
+ * u = a;
+ * __syncthreads();
+ * out = atomicInc(u, 0xFFFFFFFF);
+ */
+ asm volatile(
+ "{ .shared .align 4 .u32 u[1];\n"
+ "st.shared.u32 [u+4], %2;\n"
+ "bar.sync 0;\n"
+ "atom.shared.inc.u32 %0, [u+4], -1;\n"
+ "bar.sync 0;\n"
+ "ld.shared.u32 %1, [u+4];\n}" : "=r"(out), "=r"(aout) : "r"(base));
+ d[threadIdx.x] = out;
+ *a = aout;
+}
+
+TEST_F(SharedOffsets, StaticOverrun) {
+ cudaError_t ret;
+
+ uint32_t * a;
+ ret = cudaMalloc((void **) &a, sizeof(*a));
+ ASSERT_EQ(cudaSuccess, ret);
+
+ ret = cudaMemset(a, 0, sizeof(*a));
+ ASSERT_EQ(cudaSuccess, ret);
+
+ const uint32_t base = 256;
+ k_known_symbol_overrun<<<1, threads, 0, stream>>>(d, a, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ /**
+ * The final state of d should be undefined. Since Panoptes drops the st
+ * and atom instructions against the address, the buffer should be
+ * uninitialized.
+ */
+ std::vector<uint32_t> hd(threads);
+ ret = cudaMemcpy(hd.data(), d, sizeof(*d) * threads,
+ cudaMemcpyDeviceToHost);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::vector<uint32_t> vd(threads);
+ int vret = VALGRIND_GET_VBITS(hd.data(), vd.data(), sizeof(*d) * threads);
+ if (vret == 1) {
+ for (uint32_t i = 0; i < threads; i++) {
+ EXPECT_EQ(0xFFFFFFFF, vd[i]);
+ }
+
+ /**
+ * Verify that we did not perform the ld.shared.u32 instruction.
+ */
+ uint32_t a_final;
+ ret = cudaMemcpy(&a_final, a, sizeof(*a), cudaMemcpyDeviceToHost);
+ uint32_t va_final;
+ vret = VALGRIND_GET_VBITS(&a_final, &va_final, sizeof(a_final));
+ ASSERT_EQ(1, vret);
+
+ EXPECT_EQ(0xFFFFFFFF, va_final);
+ }
+
+ ret = cudaFree(a);
+ ASSERT_EQ(cudaSuccess, ret);
+}
+
+static __global__ void k_known_symbol_flexible(uint32_t * d, uint32_t base) {
+ extern __shared__ uint32_t uf[];
+ uf[0] = base;
+ __syncthreads();
+ d[threadIdx.x] = atomicInc(uf, 0xFFFFFFFF);
+}
+
+TEST_F(SharedOffsets, FlexibleSymbol) {
+ cudaError_t ret;
+
+ const uint32_t base = 256;
+ k_known_symbol_flexible
+ <<<1, threads, 4, stream>>>(d, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::vector<uint32_t> hd(threads);
+ ret = cudaMemcpy(hd.data(), d, sizeof(*d) * threads,
+ cudaMemcpyDeviceToHost);
+ ASSERT_EQ(cudaSuccess, ret);
+
+ std::sort(hd.begin(), hd.end());
+ for (uint32_t i = 0; i < threads; i++) {
+ EXPECT_EQ(i + base, hd[i]);
+ }
+
+ k_known_symbol_flexible<<<1, threads, 0, stream>>>(d, base);
+
+ ret = cudaStreamSynchronize(stream);
+ ASSERT_EQ(cudaErrorLaunchFailure, ret);
+
+ reset = true;
+}
+
+int main(int argc, char **argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
Please sign in to comment.
Something went wrong with that request. Please try again.