diff --git a/tc/core/polyhedral/cuda/codegen.cc b/tc/core/polyhedral/cuda/codegen.cc
index 25dc19804..e628c8874 100644
--- a/tc/core/polyhedral/cuda/codegen.cc
+++ b/tc/core/polyhedral/cuda/codegen.cc
@@ -592,8 +592,7 @@ void emitMappedTensorAccess(
     return;
   }
 
-  auto tensorId =
-      context.scop().promotedDecls().at(promotionInfo.groupId).tensorId;
+  auto tensorId = context.scop().promotedDecl(promotionInfo.groupId).tensorId;
 
   // Here and below in comments: D = domain, O = original tensor, P = promoted
   // tensor, S = partial schedule, A = AST loops;
diff --git a/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc b/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
index 8c3f30960..a7493ec53 100644
--- a/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
+++ b/tc/core/polyhedral/cuda/memory_promotion_heuristic.cc
@@ -99,41 +99,18 @@ void mapCopiesToThreads(MappedScop& mscop, bool unroll) {
 
     // Map band dimensions to threads, in inverse order since the last member
     // iterates over the last subscript and is likely to result in coalescing.
-    // Step over band members that iterate over size-1 arrays subscripts as
-    // they would have been executed by a single thread.
     // If not all available thread ids are used, fix remaining to 1 thread.
-    auto filter = node->elemAs<ScheduleTreeElemFilter>()->filter_;
-    auto filterSets = isl::UnionAsVector<isl::union_set>(filter);
-    size_t t = 0;
-    for (int i = band->nMember() - 1;
-         i >= 0 && t < mscop.numThreads.view.size();
-         --i) {
-      auto skip = std::all_of(
-          filterSets.begin(), filterSets.end(), [&mscop, i](isl::set s) {
-            auto groupId =
-                s.get_space().unwrap().get_tuple_id(isl::dim_type::out);
-            if (mscop.scop().promotedDecls().count(groupId) != 1) {
-              std::stringstream ss;
-              ss << "promoted group " << groupId << " has no declaration";
-              throw promotion::PromotionLogicError(ss.str());
-            }
-            auto decl = mscop.scop().promotedDecls().at(groupId);
-            return static_cast<size_t>(i) >= decl.sizes.size() ||
-                decl.sizes[i] == 1;
-          });
-      if (skip) {
-        continue;
-      }
-
+    auto nToMap = std::min(band->nMember(), mscop.numThreads.view.size());
+    for (size_t t = 0; t < nToMap; ++t) {
+      auto pos = band->nMember() - 1 - t;
       mapToParameterWithExtent(
           root,
           bandNode,
-          i,
+          pos,
           mapping::ThreadId::makeId(t),
           mscop.numThreads.view[t]);
-      ++t;
     }
-    mscop.mapRemaining<mapping::ThreadId>(bandNode, t);
+    mscop.mapRemaining<mapping::ThreadId>(bandNode, nToMap);
 
     // Unroll if requested.
     if (unroll) {
diff --git a/tc/core/polyhedral/memory_promotion.cc b/tc/core/polyhedral/memory_promotion.cc
index 8b725103d..19f0091f9 100644
--- a/tc/core/polyhedral/memory_promotion.cc
+++ b/tc/core/polyhedral/memory_promotion.cc
@@ -422,6 +422,30 @@ isl::set tensorElementsSet(const Scop& scop, isl::id tensorId) {
   }
   return tensorElements;
 }
+
+/*
+ * "schedule" iterates over the elements of the tensor described by "decl".
+ * Remove the schedule dimensions that correspond to tensor dimensions
+ * of size 1.
+ * Note that this function drops the name of the target space of "schedule",
+ * but this space is irrelevant for the caller.
+ */
+isl::multi_aff dropDummyTensorDimensions(
+    isl::multi_aff schedule,
+    const Scop::PromotedDecl& decl) {
+  auto list = schedule.get_aff_list();
+  auto space = schedule.get_space().domain();
+
+  auto n = list.n();
+  for (int i = n - 1; i >= 0; --i) {
+    if (decl.sizes[i] == 1) {
+      list = list.drop(i, 1);
+    }
+  }
+
+  space = space.from_domain().add_dims(isl::dim_type::out, list.n());
+  return isl::multi_aff(space, list);
+}
 } // namespace
 
 ScheduleTree* insertCopiesUnder(
@@ -449,6 +473,9 @@ ScheduleTree* insertCopiesUnder(
       isl::multi_aff::identity(promotionSpace.range().map_from_set());
   identityCopySchedule =
       identityCopySchedule.pullback(isl::multi_aff::range_map(promotionSpace));
+  // Only iterate over significant tensor dimensions.
+  auto decl = scop.promotedDecl(groupId);
+  identityCopySchedule = dropDummyTensorDimensions(identityCopySchedule, decl);
   auto readSchedule = isl::multi_union_pw_aff(
       identityCopySchedule.set_tuple_id(isl::dim_type::in, readId));
   auto writeSchedule = isl::multi_union_pw_aff(
diff --git a/tc/core/polyhedral/scop.cc b/tc/core/polyhedral/scop.cc
index 50ceddef2..6b88ac446 100644
--- a/tc/core/polyhedral/scop.cc
+++ b/tc/core/polyhedral/scop.cc
@@ -201,12 +201,12 @@ void Scop::promoteGroup(
   }
 
   auto groupId = nextGroupIdForTensor(tensorId);
-  insertCopiesUnder(*this, tree, *gr, tensorId, groupId);
   auto sizes = gr->approximationSizes();
   if (sizes.size() > 0 && forceLastExtentOdd && (sizes.back() % 2) == 0) {
     sizes.back() += 1;
   }
   promotedDecls_[groupId] = PromotedDecl{tensorId, sizes, kind};
+  insertCopiesUnder(*this, tree, *gr, tensorId, groupId);
 
   // FIXME: we can now store a unique pointer...
   auto group = std::shared_ptr<TensorReferenceGroup>(std::move(gr));
diff --git a/tc/core/polyhedral/scop.h b/tc/core/polyhedral/scop.h
index bc4953b1b..8093c66a2 100644
--- a/tc/core/polyhedral/scop.h
+++ b/tc/core/polyhedral/scop.h
@@ -17,6 +17,7 @@
 
 #include <functional>
 #include <memory>
+#include <sstream>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -313,6 +314,17 @@ struct Scop {
     return promotedDecls_;
   }
 
+  // Return the promoted declaration information associated to
+  // the given identifier of a promoted tensor reference group.
+  const PromotedDecl& promotedDecl(isl::id groupId) const {
+    if (promotedDecls().count(groupId) != 1) {
+      std::stringstream ss;
+      ss << "promoted group " << groupId << " has no declaration";
+      throw std::logic_error(ss.str());
+    }
+    return promotedDecls().at(groupId);
+  }
+
   const std::vector<std::pair<isl::union_set, PromotionInfo>>&
   activePromotions() const {
     return activePromotions_;