diff --git a/tc/core/polyhedral/cuda/codegen.cc b/tc/core/polyhedral/cuda/codegen.cc index 25dc19804..e628c8874 100644 --- a/tc/core/polyhedral/cuda/codegen.cc +++ b/tc/core/polyhedral/cuda/codegen.cc @@ -592,8 +592,7 @@ void emitMappedTensorAccess( return; } - auto tensorId = - context.scop().promotedDecls().at(promotionInfo.groupId).tensorId; + auto tensorId = context.scop().promotedDecl(promotionInfo.groupId).tensorId; // Here and below in comments: D = domain, O = original tensor, P = promoted // tensor, S = partial schedule, A = AST loops; diff --git a/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc b/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc index 8c3f30960..a7493ec53 100644 --- a/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc +++ b/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc @@ -99,41 +99,18 @@ void mapCopiesToThreads(MappedScop& mscop, bool unroll) { // Map band dimensions to threads, in inverse order since the last member // iterates over the last subscript and is likely to result in coalescing. - // Step over band members that iterate over size-1 arrays subscripts as - // they would have been executed by a single thread. // If not all available thread ids are used, fix remaining to 1 thread. - auto filter = node->elemAs()->filter_; - auto filterSets = isl::UnionAsVector(filter); - size_t t = 0; - for (int i = band->nMember() - 1; - i >= 0 && t < mscop.numThreads.view.size(); - --i) { - auto skip = std::all_of( - filterSets.begin(), filterSets.end(), [&mscop, i](isl::set s) { - auto groupId = - s.get_space().unwrap().get_tuple_id(isl::dim_type::out); - if (mscop.scop().promotedDecls().count(groupId) != 1) { - std::stringstream ss; - ss << "promoted group " << groupId << " has no declaration"; - throw promotion::PromotionLogicError(ss.str()); - } - auto decl = mscop.scop().promotedDecls().at(groupId); - return static_cast(i) >= decl.sizes.size() || - decl.sizes[i] == 1; - }); - if (skip) { - continue; - } - + auto nToMap = std::min(band->nMember(), mscop.numThreads.view.size()); + for (size_t t = 0; t < nToMap; ++t) { + auto pos = band->nMember() - 1 - t; mapToParameterWithExtent( root, bandNode, - i, + pos, mapping::ThreadId::makeId(t), mscop.numThreads.view[t]); - ++t; } - mscop.mapRemaining(bandNode, t); + mscop.mapRemaining(bandNode, nToMap); // Unroll if requested. if (unroll) { diff --git a/tc/core/polyhedral/memory_promotion.cc b/tc/core/polyhedral/memory_promotion.cc index 8b725103d..19f0091f9 100644 --- a/tc/core/polyhedral/memory_promotion.cc +++ b/tc/core/polyhedral/memory_promotion.cc @@ -422,6 +422,30 @@ isl::set tensorElementsSet(const Scop& scop, isl::id tensorId) { } return tensorElements; } + +/* + * "schedule" iterates over the elements of the tensor described by "decl". + * Remove the schedule dimensions that correspond to tensor dimensions + * of size 1. + * Note that this function drops the name of the target space of "schedule", + * but this space is irrelevant for the caller. + */ +isl::multi_aff dropDummyTensorDimensions( + isl::multi_aff schedule, + const Scop::PromotedDecl& decl) { + auto list = schedule.get_aff_list(); + auto space = schedule.get_space().domain(); + + auto n = list.n(); + for (int i = n - 1; i >= 0; --i) { + if (decl.sizes[i] == 1) { + list = list.drop(i, 1); + } + } + + space = space.from_domain().add_dims(isl::dim_type::out, list.n()); + return isl::multi_aff(space, list); +} } // namespace ScheduleTree* insertCopiesUnder( @@ -449,6 +473,9 @@ ScheduleTree* insertCopiesUnder( isl::multi_aff::identity(promotionSpace.range().map_from_set()); identityCopySchedule = identityCopySchedule.pullback(isl::multi_aff::range_map(promotionSpace)); + // Only iterate over significant tensor dimensions. + auto decl = scop.promotedDecl(groupId); + identityCopySchedule = dropDummyTensorDimensions(identityCopySchedule, decl); auto readSchedule = isl::multi_union_pw_aff( identityCopySchedule.set_tuple_id(isl::dim_type::in, readId)); auto writeSchedule = isl::multi_union_pw_aff( diff --git a/tc/core/polyhedral/scop.cc b/tc/core/polyhedral/scop.cc index 50ceddef2..6b88ac446 100644 --- a/tc/core/polyhedral/scop.cc +++ b/tc/core/polyhedral/scop.cc @@ -201,12 +201,12 @@ void Scop::promoteGroup( } auto groupId = nextGroupIdForTensor(tensorId); - insertCopiesUnder(*this, tree, *gr, tensorId, groupId); auto sizes = gr->approximationSizes(); if (sizes.size() > 0 && forceLastExtentOdd && (sizes.back() % 2) == 0) { sizes.back() += 1; } promotedDecls_[groupId] = PromotedDecl{tensorId, sizes, kind}; + insertCopiesUnder(*this, tree, *gr, tensorId, groupId); // FIXME: we can now store a unique pointer... auto group = std::shared_ptr(std::move(gr)); diff --git a/tc/core/polyhedral/scop.h b/tc/core/polyhedral/scop.h index bc4953b1b..8093c66a2 100644 --- a/tc/core/polyhedral/scop.h +++ b/tc/core/polyhedral/scop.h @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -313,6 +314,17 @@ struct Scop { return promotedDecls_; } + // Return the promoted declaration information associated to + // the given identifier of a promoted tensor reference group. + const PromotedDecl& promotedDecl(isl::id groupId) const { + if (promotedDecls().count(groupId) != 1) { + std::stringstream ss; + ss << "promoted group " << groupId << " has no declaration"; + throw std::logic_error(ss.str()); + } + return promotedDecls().at(groupId); + } + const std::vector>& activePromotions() const { return activePromotions_;