diff --git a/sycl/doc/design/CommandGraph.md b/sycl/doc/design/CommandGraph.md
index 4e5b80b593e46..956082b31097f 100644
--- a/sycl/doc/design/CommandGraph.md
+++ b/sycl/doc/design/CommandGraph.md
@@ -305,6 +305,28 @@ from the same dynamic command-group object. This allows the SYCL runtime to
 access the list of alternative kernels when calling the UR API to append a
 kernel command to a command-buffer.
 
+## Graph-Owned Memory Allocations
+### Device Allocations
+
+Device allocations for graphs are implemented using virtual memory. Allocation
+commands performing a virtual reservation for the provided size, and physical
+memory is created and mapped only during graph finalization. This allows valid
+device addresses to be returned immediately when building the graph without the
+penalty of doing any memory allocations during graph building, which could have
+a negative impact on features such as whole-graph update through increased
+overhead.
+
+### Behaviour of async_free
+
+`async_free` nodes are treated as hints rather than an actual memory free
+operation. This is because deallocating during graph execution is both
+undesirable for performance and not feasible with the current
+implementation/backends. Instead a free node represents a promise from the user
+that the memory is no longer in use. This enables optimizations such as
+potentially reusing that memory for subsequent allocation nodes in the graph.
+This allows us to reduce the total amount of concurrent memory required by a
+single graph.
+
 ## Optimizations
 ### Interactions with Profiling
 
diff --git a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
index 53800bbfb4380..a902381de3c5f 100644
--- a/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
+++ b/sycl/include/sycl/ext/oneapi/experimental/graph.hpp
@@ -121,7 +121,9 @@ enum class node_type {
   memadvise = 7,
   ext_oneapi_barrier = 8,
   host_task = 9,
-  native_command = 10
+  native_command = 10,
+  async_malloc = 11,
+  async_free = 12
 };
 
 /// Class representing a node in the graph, returned by command_graph::add().
@@ -429,6 +431,10 @@ class __SYCL_EXPORT executable_command_graph
   /// @param Nodes The nodes to use for updating the graph.
   void update(const std::vector<node> &Nodes);
 
+  /// Return the total amount of memory required by this graph for graph-owned
+  /// memory allocations.
+  size_t get_required_mem_size() const;
+
   /// Common Reference Semantics
   friend bool operator==(const executable_command_graph &LHS,
                          const executable_command_graph &RHS) {
diff --git a/sycl/source/detail/async_alloc.cpp b/sycl/source/detail/async_alloc.cpp
index 8251da36ea492..b21f6d13c42e3 100644
--- a/sycl/source/detail/async_alloc.cpp
+++ b/sycl/source/detail/async_alloc.cpp
@@ -44,23 +44,25 @@ void *async_malloc(sycl::handler &h, sycl::usm::alloc kind, size_t size) {
         sycl::make_error_code(sycl::errc::feature_not_supported),
         "Only device backed asynchronous allocations are supported!");
 
-  h.throwIfGraphAssociated<
-      ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
-          sycl_ext_oneapi_async_alloc>();
-
   auto &Adapter = h.getContextImplPtr()->getAdapter();
-  auto &Q = h.MQueue->getHandleRef();
 
   // Get events to wait on.
   auto depEvents = getUrEvents(h.impl->CGData.MEvents);
   uint32_t numEvents = h.impl->CGData.MEvents.size();
 
   void *alloc = nullptr;
-  ur_event_handle_t Event;
-  Adapter->call<sycl::errc::runtime,
-                sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
-      Q, (ur_usm_pool_handle_t)0, size, nullptr, numEvents, depEvents.data(),
-      &alloc, &Event);
+
+  ur_event_handle_t Event = nullptr;
+  // If a graph is present do the allocation from the graph memory pool instead.
+  if (auto Graph = h.getCommandGraph(); Graph) {
+    alloc = Graph->getMemPool().malloc(size, kind);
+  } else {
+    auto &Q = h.MQueue->getHandleRef();
+    Adapter->call<sycl::errc::runtime,
+                  sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
+        Q, (ur_usm_pool_handle_t)0, size, nullptr, numEvents, depEvents.data(),
+        &alloc, &Event);
+  }
 
   // Async malloc must return a void* immediately.
   // Set up CommandGroup which is a no-op and pass the
@@ -90,12 +92,7 @@ __SYCL_EXPORT void *async_malloc(const sycl::queue &q, sycl::usm::alloc kind,
 __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size,
                                            const memory_pool &pool) {
 
-  h.throwIfGraphAssociated<
-      ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
-          sycl_ext_oneapi_async_alloc>();
-
   auto &Adapter = h.getContextImplPtr()->getAdapter();
-  auto &Q = h.MQueue->getHandleRef();
   auto &memPoolImpl = sycl::detail::getSyclObjImpl(pool);
 
   // Get events to wait on.
@@ -103,12 +100,20 @@ __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size,
   uint32_t numEvents = h.impl->CGData.MEvents.size();
 
   void *alloc = nullptr;
-  ur_event_handle_t Event;
-  Adapter->call<sycl::errc::runtime,
-                sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
-      Q, memPoolImpl.get()->get_handle(), size, nullptr, numEvents,
-      depEvents.data(), &alloc, &Event);
 
+  ur_event_handle_t Event = nullptr;
+  // If a graph is present do the allocation from the graph memory pool instead.
+  if (auto Graph = h.getCommandGraph(); Graph) {
+    // Memory pool is passed as the graph may use some properties of it.
+    alloc = Graph->getMemPool().malloc(size, pool.get_alloc_kind(),
+                                       sycl::detail::getSyclObjImpl(pool));
+  } else {
+    auto &Q = h.MQueue->getHandleRef();
+    Adapter->call<sycl::errc::runtime,
+                  sycl::detail::UrApiKind::urEnqueueUSMDeviceAllocExp>(
+        Q, memPoolImpl.get()->get_handle(), size, nullptr, numEvents,
+        depEvents.data(), &alloc, &Event);
+  }
   // Async malloc must return a void* immediately.
   // Set up CommandGroup which is a no-op and pass the event from the alloc.
   h.impl->MAsyncAllocEvent = Event;
@@ -135,9 +140,15 @@ async_malloc_from_pool(const sycl::queue &q, size_t size,
 }
 
 __SYCL_EXPORT void async_free(sycl::handler &h, void *ptr) {
-  h.throwIfGraphAssociated<
-      ext::oneapi::experimental::detail::UnsupportedGraphFeatures::
-          sycl_ext_oneapi_async_alloc>();
+  if (auto Graph = h.getCommandGraph(); Graph) {
+    // Check if the pointer to be freed has an associated allocation node, and
+    // error if not
+    if (!Graph->getMemPool().hasAllocation(ptr)) {
+      throw sycl::exception(sycl::make_error_code(sycl::errc::invalid),
+                            "Cannot add a free node to a graph for which "
+                            "there is no associated allocation node!");
+    }
+  }
 
   h.impl->MFreePtr = ptr;
   h.setType(detail::CGType::AsyncFree);
diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp
index 8cb8647c49ee9..ce5985275ac21 100644
--- a/sycl/source/detail/graph_impl.cpp
+++ b/sycl/source/detail/graph_impl.cpp
@@ -57,6 +57,10 @@ inline const char *nodeTypeToString(node_type NodeType) {
     return "host_task";
   case node_type::native_command:
     return "native_command";
+  case node_type::async_malloc:
+    return "async_malloc";
+  case node_type::async_free:
+    return "async_free";
   }
   assert(false && "Unhandled node type");
   return {};
@@ -340,7 +344,7 @@ graph_impl::graph_impl(const sycl::context &SyclContext,
                        const sycl::device &SyclDevice,
                        const sycl::property_list &PropList)
     : MContext(SyclContext), MDevice(SyclDevice), MRecordingQueues(),
-      MEventsMap(), MInorderQueueMap(),
+      MEventsMap(), MInorderQueueMap(), MGraphMemPool(SyclContext, SyclDevice),
       MID(NextAvailableID.fetch_add(1, std::memory_order_relaxed)) {
   checkGraphPropertiesAndThrow(PropList);
   if (PropList.has_property<property::graph::no_cycle_check>()) {
@@ -750,12 +754,12 @@ void graph_impl::beginRecording(
   }
 }
 
-// Check if nodes are empty and if so loop back through predecessors until we
-// find the real dependency.
+// Check if nodes do not require enqueueing and if so loop back through
+// predecessors until we find the real dependency.
 void exec_graph_impl::findRealDeps(
     std::vector<ur_exp_command_buffer_sync_point_t> &Deps,
     std::shared_ptr<node_impl> CurrentNode, int ReferencePartitionNum) {
-  if (CurrentNode->isEmpty()) {
+  if (!CurrentNode->requiresEnqueue()) {
     for (auto &N : CurrentNode->MPredecessors) {
       auto NodeImpl = N.lock();
       findRealDeps(Deps, NodeImpl, ReferencePartitionNum);
@@ -875,9 +879,9 @@ void exec_graph_impl::createCommandBuffers(
   Partition->MCommandBuffers[Device] = OutCommandBuffer;
 
   for (const auto &Node : Partition->MSchedule) {
-    // Empty nodes are not processed as other nodes, but only their
+    // Some nodes are not scheduled like other nodes, and only their
     // dependencies are propagated in findRealDeps
-    if (Node->isEmpty())
+    if (!Node->requiresEnqueue())
       continue;
 
     sycl::detail::CGType type = Node->MCGType;
@@ -943,6 +947,8 @@ exec_graph_impl::exec_graph_impl(sycl::context Context,
 
 exec_graph_impl::~exec_graph_impl() {
   try {
+    MGraphImpl->markExecGraphDestroyed();
+
     const sycl::detail::AdapterPtr &Adapter =
         sycl::detail::getSyclObjImpl(MContext)->getAdapter();
     MSchedule.clear();
@@ -952,6 +958,9 @@ exec_graph_impl::~exec_graph_impl() {
       Event->wait(Event);
     }
 
+    // Clean up any graph-owned allocations that were allocated
+    MGraphImpl->getMemPool().deallocateAndUnmapAll();
+
     for (const auto &Partition : MPartitions) {
       Partition->MSchedule.clear();
       for (const auto &Iter : Partition->MCommandBuffers) {
@@ -1810,6 +1819,14 @@ modifiable_command_graph::finalize(const sycl::property_list &PropList) const {
   // Graph is read and written in this scope so we lock
   // this graph with full priviledges.
   graph_impl::WriteLock Lock(impl->MMutex);
+  // If the graph uses graph-owned allocations and an executable graph already
+  // exists we must throw an error.
+  if (impl->getMemPool().hasAllocations() && impl->getExecGraphCount() > 0) {
+    throw sycl::exception(sycl::make_error_code(errc::invalid),
+                          "Graphs containing allocations can only have a "
+                          "single executable graph alive at any one time.");
+  }
+
   return command_graph<graph_state::executable>{
       this->impl, this->impl->getContext(), PropList};
 }
@@ -1937,11 +1954,16 @@ executable_command_graph::executable_command_graph(
     const property_list &PropList)
     : impl(std::make_shared<detail::exec_graph_impl>(Ctx, Graph, PropList)) {
   finalizeImpl(); // Create backend representation for executable graph
+  // Mark that we have created an executable graph from the modifiable graph.
+  Graph->markExecGraphCreated();
 }
 
 void executable_command_graph::finalizeImpl() {
   impl->makePartitions();
 
+  // Handle any work required for graph-owned memory allocations
+  impl->finalizeMemoryAllocations();
+
   auto Device = impl->getGraphImpl()->getDevice();
   for (auto Partition : impl->getPartitions()) {
     if (!Partition->isHostTask()) {
@@ -1969,6 +1991,13 @@ void executable_command_graph::update(const std::vector<node> &Nodes) {
   impl->update(NodeImpls);
 }
 
+size_t executable_command_graph::get_required_mem_size() const {
+  // Since each graph has a unique mem pool, return the current memory usage for
+  // now. This call my change if we move to being able to share memory between
+  // unique graphs.
+  return impl->getGraphImpl()->getMemPool().getMemUseCurrent();
+}
+
 dynamic_parameter_base::dynamic_parameter_base(
     command_graph<graph_state::modifiable> Graph)
     : impl(std::make_shared<dynamic_parameter_impl>(
diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp
index 3234721626a10..acc334f61cc3a 100644
--- a/sycl/source/detail/graph_impl.hpp
+++ b/sycl/source/detail/graph_impl.hpp
@@ -17,6 +17,7 @@
 #include <detail/accessor_impl.hpp>
 #include <detail/cg.hpp>
 #include <detail/event_impl.hpp>
+#include <detail/graph_memory_pool.hpp>
 #include <detail/host_task.hpp>
 #include <detail/kernel_impl.hpp>
 #include <detail/sycl_mem_obj_t.hpp>
@@ -73,6 +74,11 @@ inline node_type getNodeTypeFromCG(sycl::detail::CGType CGType) {
     return node_type::subgraph;
   case sycl::detail::CGType::EnqueueNativeCommand:
     return node_type::native_command;
+  case sycl::detail::CGType::AsyncAlloc:
+    return node_type::async_malloc;
+  case sycl::detail::CGType::AsyncFree:
+    return node_type::async_free;
+
   default:
     assert(false && "Invalid Graph Node Type");
     return node_type::empty;
@@ -473,6 +479,21 @@ class node_impl : public std::enable_shared_from_this<node_impl> {
     }
   }
 
+  /// Returns true if this node should be enqueued to the backend, if not only
+  /// its dependencies are considered.
+  bool requiresEnqueue() const {
+    switch (MNodeType) {
+    case node_type::empty:
+    case node_type::ext_oneapi_barrier:
+    case node_type::async_malloc:
+    case node_type::async_free:
+      return false;
+
+    default:
+      return true;
+    }
+  }
+
 private:
   void rebuildArgStorage(std::vector<sycl::detail::ArgDesc> &Args,
                          const std::vector<std::vector<char>> &OldArgStorage,
@@ -919,6 +940,12 @@ class graph_impl : public std::enable_shared_from_this<graph_impl> {
   /// @return Context associated with graph.
   sycl::context getContext() const { return MContext; }
 
+  /// Query for the context impl tied to this graph.
+  /// @return shared_ptr ref for the context impl associated with graph.
+  const std::shared_ptr<sycl::detail::context_impl> &getContextImplPtr() const {
+    return sycl::detail::getSyclObjImpl(MContext);
+  }
+
   /// Query for the device_impl tied to this graph.
   /// @return device_impl shared ptr reference associated with graph.
   const DeviceImplPtr &getDeviceImplPtr() const {
@@ -1139,6 +1166,32 @@ class graph_impl : public std::enable_shared_from_this<graph_impl> {
 
   unsigned long long getID() const { return MID; }
 
+  /// Get the memory pool used for graph-owned allocations.
+  graph_mem_pool &getMemPool() { return MGraphMemPool; }
+
+  /// Mark that an executable graph was created from this modifiable graph, used
+  /// for tracking live graphs for graph-owned allocations.
+  void markExecGraphCreated() { MExecGraphCount++; }
+
+  /// Mark that an executable graph created from this modifiable graph was
+  /// destroyed, used for tracking live graphs for graph-owned allocations.
+  void markExecGraphDestroyed() {
+    while (true) {
+      size_t CurrentVal = MExecGraphCount;
+      if (CurrentVal == 0) {
+        break;
+      }
+      if (MExecGraphCount.compare_exchange_strong(CurrentVal, CurrentVal - 1) ==
+          false) {
+        continue;
+      }
+    }
+  }
+
+  /// Get the number of unique executable graph instances currently alive for
+  /// this graph.
+  size_t getExecGraphCount() const { return MExecGraphCount; }
+
 private:
   /// Check the graph for cycles by performing a depth-first search of the
   /// graph. If a node is visited more than once in a given path through the
@@ -1206,10 +1259,17 @@ class graph_impl : public std::enable_shared_from_this<graph_impl> {
   std::map<std::weak_ptr<sycl::detail::queue_impl>, std::shared_ptr<node_impl>,
            std::owner_less<std::weak_ptr<sycl::detail::queue_impl>>>
       MBarrierDependencyMap;
+  /// Graph memory pool for handling graph-owned memory allocations for this
+  /// graph.
+  graph_mem_pool MGraphMemPool;
 
   unsigned long long MID;
   // Used for std::hash in order to create a unique hash for the instance.
   inline static std::atomic<unsigned long long> NextAvailableID = 0;
+
+  // The number of live executable graphs that have been created from this
+  // modifiable graph
+  std::atomic<size_t> MExecGraphCount = 0;
 };
 
 /// Class representing the implementation of command_graph<executable>.
@@ -1334,6 +1394,14 @@ class exec_graph_impl {
 
   unsigned long long getID() const { return MID; }
 
+  /// Do any work required during finalization to finalize graph-owned memory
+  /// allocations.
+  void finalizeMemoryAllocations() {
+    // This call allocates physical memory and maps all virtual device
+    // allocations
+    MGraphImpl->getMemPool().allocateAndMapAll();
+  }
+
 private:
   /// Create a command-group for the node and add it to command-buffer by going
   /// through the scheduler.
diff --git a/sycl/source/detail/graph_memory_pool.hpp b/sycl/source/detail/graph_memory_pool.hpp
new file mode 100644
index 0000000000000..be90c3b58892a
--- /dev/null
+++ b/sycl/source/detail/graph_memory_pool.hpp
@@ -0,0 +1,210 @@
+//==--------- graph_memory_pool.hpp --- SYCL graph extension ---------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#pragma once
+
+#include <detail/physical_mem_impl.hpp>                // For physical_mem_impl
+#include <sycl/context.hpp>                            // For context
+#include <sycl/device.hpp>                             // For device
+#include <sycl/ext/oneapi/virtual_mem/virtual_mem.hpp> // For get_mem_granularity
+
+namespace sycl {
+inline namespace _V1 {
+namespace ext {
+namespace oneapi {
+namespace experimental {
+namespace detail {
+
+/// Class handling graph-owned memory allocations. Device allocations are
+/// managed using virtual memory.
+class graph_mem_pool {
+  // Info descriptor for various properties of an allocation
+  struct alloc_info {
+    // Allocation kind
+    usm::alloc Kind;
+    // Size of the allocation
+    size_t Size;
+    // Is currently mapped to physical memory
+    bool Mapped = false;
+    // Index into the array of physical memory
+    size_t PhysicalMemID = 0;
+    // Is the allocation read only
+    bool ReadOnly = false;
+    // Should the allocation be zero initialized during initial allocation
+    bool ZeroInit = false;
+  };
+
+public:
+  graph_mem_pool(const context &Context, const device &Device)
+      : MContext(Context), MDevice(Device) {}
+  ~graph_mem_pool() {
+    for (auto &[Ptr, AllocInfo] : MAllocations) {
+      // Unmap allocations if required before physical memory is released
+      // Physical mem is released when MPhysicalMem is cleared
+      if (AllocInfo.Mapped) {
+        unmap(Ptr, AllocInfo.Size, MContext);
+      }
+      // Free the VA range
+      free_virtual_mem(reinterpret_cast<uintptr_t>(Ptr), AllocInfo.Size,
+                       MContext);
+    }
+    MPhysicalMem.clear();
+  }
+
+  /// Memory pool cannot be copied
+  graph_mem_pool(graph_mem_pool &) = delete;
+
+  /// Get a pointer to a new allocation. For device allocations these are
+  /// virtual reservations which must be later mapped to allocated physical
+  /// memory before use by calling allocateAndMapAll()
+  /// @param Size Size of the allocation
+  /// @param AllocType Type of the allocation
+  /// @param MemPool Optional memory pool from which allocations will not be
+  /// made directly but properties may be respected.
+  /// @return A pointer to the start of the allocation
+  void *malloc(size_t Size, usm::alloc AllocType,
+               const std::shared_ptr<memory_pool_impl> &MemPool = nullptr) {
+    void *Alloc = nullptr;
+    switch (AllocType) {
+    case usm::alloc::device: {
+
+      auto &CtxImpl = sycl::detail::getSyclObjImpl(MContext);
+      auto &Adapter = CtxImpl->getAdapter();
+
+      size_t Granularity = get_mem_granularity(MDevice, MContext);
+      uintptr_t StartPtr = 0;
+      size_t AlignedSize = alignByteSize(Size, Granularity);
+      // Do virtual reservation
+      Adapter->call<sycl::errc::runtime,
+                    sycl::detail::UrApiKind::urVirtualMemReserve>(
+          CtxImpl->getHandleRef(), reinterpret_cast<void *>(StartPtr),
+          AlignedSize, &Alloc);
+
+      alloc_info AllocInfo = {};
+      AllocInfo.Kind = AllocType;
+      AllocInfo.Size = AlignedSize;
+
+      // Collect relevant properties from memory pool
+      if (MemPool) {
+        auto PropList = MemPool->getPropList();
+        if (PropList.has_property<property::memory_pool::zero_init>()) {
+          AllocInfo.ZeroInit = true;
+        }
+        if (PropList.has_property<property::memory_pool::read_only>()) {
+          AllocInfo.ReadOnly = true;
+        }
+      }
+
+      MAllocations[Alloc] = AllocInfo;
+      break;
+    }
+
+    default:
+      throw sycl::exception(sycl::make_error_code(errc::feature_not_supported),
+                            "Only device allocations are currently supported "
+                            "in graph allocation nodes!");
+      break;
+    }
+
+    return Alloc;
+  }
+
+  /// Return the total amount of memory being used by this pool
+  size_t getMemUseCurrent() const {
+    size_t TotalMem = 0;
+    for (auto &PhysicalMem : MPhysicalMem) {
+      TotalMem += PhysicalMem->size();
+    }
+
+    return TotalMem;
+  }
+
+  /// For device allocations using virtual memory this function allocates
+  /// physical memory and maps each virtual range to it, should be called during
+  /// graph finalization.
+  void allocateAndMapAll() {
+    // Queue used for any initializing of memory, zero-init etc.
+    sycl::queue Queue{MContext, MDevice};
+
+    for (auto &Allocation : MAllocations) {
+      // Set access mode
+      void *Ptr = Allocation.first;
+      alloc_info &AllocInfo = Allocation.second;
+      address_access_mode AccessMode = AllocInfo.ReadOnly
+                                           ? address_access_mode::read
+                                           : address_access_mode::read_write;
+
+      // Create physical memory
+      auto PhysicalMem = std::make_shared<physical_mem_impl>(MDevice, MContext,
+                                                             AllocInfo.Size);
+      // Map the virtual reservation to it
+      PhysicalMem->map(reinterpret_cast<uintptr_t>(Ptr), AllocInfo.Size,
+                       AccessMode, 0);
+
+      // Zero init if required
+      if (AllocInfo.ZeroInit) {
+        Queue.submit(
+            [&](sycl::handler &CGH) { CGH.memset(Ptr, 0, AllocInfo.Size); });
+      }
+
+      MPhysicalMem.push_back(PhysicalMem);
+      AllocInfo.PhysicalMemID = MPhysicalMem.size() - 1;
+      AllocInfo.Mapped = true;
+    }
+
+    // Wait on any operations we enqueued.
+    Queue.wait_and_throw();
+  }
+
+  /// For device virtual reservations unmap and deallocate physical memory.
+  /// Virtual reservations are not released and can be reallocated/mapped again.
+  /// Typically called on executable graph destruction.
+  void deallocateAndUnmapAll() {
+    for (auto &[Ptr, AllocInfo] : MAllocations) {
+      // Unmap allocations before physical memory is released
+      // Physical mem is released when MPhysicalMem is cleared
+      unmap(Ptr, AllocInfo.Size, MContext);
+      AllocInfo.PhysicalMemID = 0;
+      AllocInfo.Mapped = false;
+    }
+
+    MPhysicalMem.clear();
+  }
+
+  /// True if this pool has any allocations
+  bool hasAllocations() const { return MAllocations.size(); }
+
+  // True if an allocation exists for this pointer
+  bool hasAllocation(void *Ptr) const {
+    return MAllocations.find(Ptr) != MAllocations.end();
+  }
+
+private:
+  /// Returns an aligned byte size given a required granularity
+  /// @param UnalignedByteSize The original requested allocation size
+  /// @param Granularity The required granularity for this allocation
+  /// @returns The aligned size
+  static size_t alignByteSize(size_t UnalignedByteSize, size_t Granularity) {
+    return ((UnalignedByteSize + Granularity - 1) / Granularity) * Granularity;
+  }
+
+  /// Context associated with allocations from this pool
+  context MContext;
+  /// Device associated with allocations from this pool
+  device MDevice;
+  /// Map of allocated pointers to an info struct
+  std::unordered_map<void *, alloc_info> MAllocations;
+  /// List of physical memory allocations used for virtual device reservations
+  std::vector<std::shared_ptr<physical_mem_impl>> MPhysicalMem;
+};
+} // namespace detail
+} // namespace experimental
+} // namespace oneapi
+} // namespace ext
+} // namespace _V1
+} // namespace sycl
diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp
index a6057ac5ceaf4..0f60442aa9e18 100644
--- a/sycl/source/handler.cpp
+++ b/sycl/source/handler.cpp
@@ -2052,6 +2052,9 @@ void handler::memcpyFromHostOnlyDeviceGlobal(void *Dest,
 
 const std::shared_ptr<detail::context_impl> &
 handler::getContextImplPtr() const {
+  if (impl->MGraph) {
+    return impl->MGraph->getContextImplPtr();
+  }
   return MQueue->getContextImplPtr();
 }
 
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device.cpp
new file mode 100644
index 0000000000000..108ef34fe3489
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/async_alloc_device.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device_zero_init.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device_zero_init.cpp
new file mode 100644
index 0000000000000..0595482a24731
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_device_zero_init.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/async_alloc_device_zero_init.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_queries.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_queries.cpp
new file mode 100644
index 0000000000000..b86cd9bf04476
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Explicit/async_alloc_queries.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_EXPLICIT
+
+#include "../Inputs/async_alloc_queries.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device.cpp
new file mode 100644
index 0000000000000..6d4452815f645
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device.cpp
@@ -0,0 +1,147 @@
+// Tests basic adding of async allocation nodes for device allocations
+
+#include "../../graph_common.hpp"
+#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>
+
+using T = int;
+void add_nodes_to_graph(
+    exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
+    queue &Queue, size_t Size, T *PtrInput) {
+  T *AsyncPtrA = nullptr;
+  // Add alloc node
+  auto AllocNode = add_node(Graph, Queue, [&](handler &CGH) {
+    AsyncPtrA = static_cast<T *>(
+        exp_ext::async_malloc(CGH, usm::alloc::device, Size * sizeof(T)));
+  });
+  // Add memcpy to alloc node
+  auto MemcpyNodeA = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, AllocNode);
+        CGH.memcpy(AsyncPtrA, PtrInput, Size * sizeof(T));
+      },
+      AllocNode);
+
+  // add kernel that operates on async memory
+  auto KernelNodeA = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, MemcpyNodeA);
+        CGH.parallel_for(range<1>(Size), [=](item<1> Item) {
+          size_t LinID = Item.get_linear_id();
+          PtrInput[LinID] += AsyncPtrA[LinID];
+        });
+      },
+      MemcpyNodeA);
+
+  // Add free node
+
+  auto FreeNode = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, KernelNodeA);
+        exp_ext::async_free(CGH, AsyncPtrA);
+      },
+      KernelNodeA);
+
+  // Add alloc node
+
+  T *AsyncPtrB = nullptr;
+
+  auto AllocNodeB = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, FreeNode);
+        AsyncPtrB = static_cast<T *>(
+            exp_ext::async_malloc(CGH, usm::alloc::device, Size * sizeof(T)));
+      },
+      FreeNode);
+
+  // Add kernels that operates on async memory
+  auto KernelNodeB = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, AllocNodeB);
+        CGH.parallel_for(range<1>(Size), [=](item<1> Item) {
+          size_t LinID = Item.get_linear_id();
+          AsyncPtrB[LinID] = PtrInput[LinID] + LinID;
+        });
+      },
+      AllocNodeB);
+  auto KernelNodeC = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, KernelNodeB);
+        CGH.parallel_for(range<1>(Size), [=](item<1> Item) {
+          size_t LinID = Item.get_linear_id();
+          AsyncPtrB[LinID] *= 3;
+        });
+      },
+      KernelNodeB);
+  // Add copy back to input USM pointer
+  auto MemcpyNodeB = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, KernelNodeC);
+        CGH.memcpy(PtrInput, AsyncPtrB, Size * sizeof(T));
+      },
+      KernelNodeC);
+  // Add free node
+  add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, MemcpyNodeB);
+        exp_ext::async_free(CGH, AsyncPtrB);
+      },
+      MemcpyNodeB);
+}
+
+void calculate_reference_data(size_t Iterations, size_t Size,
+                              std::vector<T> &ReferenceOutput) {
+  for (size_t i = 0; i < Iterations; i++) {
+    for (size_t j = 0; j < Size; j++) {
+      ReferenceOutput[j] *= 2;
+      ReferenceOutput[j] += j;
+      ReferenceOutput[j] *= 3;
+    }
+  }
+}
+
+int main() {
+  queue Queue{};
+
+  std::vector<T> DataInput(Size);
+
+  std::iota(DataInput.begin(), DataInput.end(), 1);
+
+  std::vector<T> ReferenceOutput(DataInput);
+  calculate_reference_data(Iterations, Size, ReferenceOutput);
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  T *PtrInput = malloc_device<T>(Size, Queue);
+
+  std::vector<T> OutputData(Size);
+
+  Queue.copy(DataInput.data(), PtrInput, Size);
+  Queue.wait_and_throw();
+
+  // Add commands to graph
+  add_nodes_to_graph(Graph, Queue, Size, PtrInput);
+
+  auto GraphExec = Graph.finalize();
+
+  for (unsigned n = 0; n < Iterations; n++) {
+    Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
+  }
+
+  Queue.wait_and_throw();
+  Queue.copy(PtrInput, OutputData.data(), Size).wait_and_throw();
+  for (size_t i = 0; i < Size; i++) {
+    assert(check_value(i, ReferenceOutput[i], OutputData[i], "OutputData"));
+  }
+
+  free(PtrInput, Queue);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device_zero_init.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device_zero_init.cpp
new file mode 100644
index 0000000000000..32078c8ce660b
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_device_zero_init.cpp
@@ -0,0 +1,59 @@
+// Tests zero initializing a graph memory allocation based on memory pool
+// properties
+
+#include "../../graph_common.hpp"
+#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>
+#include <sycl/ext/oneapi/experimental/async_alloc/memory_pool.hpp>
+#include <sycl/ext/oneapi/experimental/async_alloc/memory_pool_properties.hpp>
+
+using T = int;
+
+int main() {
+  queue Queue{};
+
+  std::vector<T> Output(Size);
+  std::vector<T> ReferenceOutput(Size, T(0));
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  exp_ext::memory_pool MemPool{
+      Queue, usm::alloc::device, {exp_ext::property::memory_pool::zero_init{}}};
+
+  // Add commands to graph
+  T *AsyncPtrA = nullptr;
+  // Add alloc node that is zero initialized
+  auto AllocNode = add_node(Graph, Queue, [&](handler &CGH) {
+    AsyncPtrA = static_cast<T *>(
+        exp_ext::async_malloc_from_pool(CGH, Size * sizeof(T), MemPool));
+  });
+
+  // Copy that zero init memory back to host
+  auto MemcpyNode = add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, AllocNode);
+        CGH.memcpy(Output.data(), AsyncPtrA, Size * sizeof(T));
+      },
+      AllocNode);
+
+  // Free memory
+  add_node(
+      Graph, Queue,
+      [&](handler &CGH) {
+        depends_on_helper(CGH, MemcpyNode);
+        exp_ext::async_free(CGH, AsyncPtrA);
+      },
+      MemcpyNode);
+
+  auto GraphExec = Graph.finalize();
+
+  Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
+
+  Queue.wait_and_throw();
+
+  for (size_t i = 0; i < Size; i++) {
+    assert(check_value(i, ReferenceOutput[i], Output[i], "Output"));
+  }
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_queries.cpp b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_queries.cpp
new file mode 100644
index 0000000000000..08ac34f81f3d0
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/Inputs/async_alloc_queries.cpp
@@ -0,0 +1,51 @@
+// Tests queries associated with graph-owned allocations
+
+#include "../../graph_common.hpp"
+#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>
+
+using T = int;
+
+int main() {
+  queue Queue{};
+
+  size_t Size = 2 << 18;
+
+  // Expected size is number of elements * size of data type * iterations
+  const size_t ExpectedMinSize = Size * sizeof(T) * Iterations;
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  // Graph should report 0 memory usage with no allocation nodes
+  {
+    auto GraphExec = Graph.finalize();
+    assert(GraphExec.get_required_mem_size() == 0);
+  }
+
+  // Add allocs and frees for each command. Allocs are all root nodes so they
+  // will require unique memory for each one.
+  for (size_t i = 0; i < Iterations; i++) {
+    T *AsyncPtr = nullptr;
+    // Add alloc node
+    auto AllocNode = add_node(Graph, Queue, [&](handler &CGH) {
+      AsyncPtr = static_cast<T *>(
+          exp_ext::async_malloc(CGH, usm::alloc::device, Size * sizeof(T)));
+    });
+
+    // Free memory, node depends on only the associated allocation node
+    add_node(
+        Graph, Queue,
+        [&](handler &CGH) {
+          depends_on_helper(CGH, AllocNode);
+          exp_ext::async_free(CGH, AsyncPtr);
+        },
+        AllocNode);
+  }
+
+  auto GraphExec = Graph.finalize();
+
+  // Memory allocated might be adjusted for example based on device granularity,
+  // so it may be more than expected but never less.
+  assert(GraphExec.get_required_mem_size() >= ExpectedMinSize);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device.cpp b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device.cpp
new file mode 100644
index 0000000000000..35776d4bbbd26
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/async_alloc_device.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device_zero_init.cpp b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device_zero_init.cpp
new file mode 100644
index 0000000000000..0dd1bcf149f80
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_device_zero_init.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/async_alloc_device_zero_init.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_queries.cpp b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_queries.cpp
new file mode 100644
index 0000000000000..c2fe4c9b195b7
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/RecordReplay/async_alloc_queries.cpp
@@ -0,0 +1,10 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+#define GRAPH_E2E_RECORD_REPLAY
+
+#include "../Inputs/async_alloc_queries.cpp"
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_finalize_exception.cpp b/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_finalize_exception.cpp
new file mode 100644
index 0000000000000..847e3d0dfbf08
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_finalize_exception.cpp
@@ -0,0 +1,93 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+// Extra run to check for immediate-command-list in Level Zero
+// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
+
+// Tests that attempting to finalize and create more than one executable graph
+// containing allocations is an error.
+
+#include "../graph_common.hpp"
+#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>
+
+using T = int;
+
+int main() {
+  queue Queue{};
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  std::vector<T> OutputData(Size);
+  std::vector<T> ReferenceData(Size);
+
+  std::iota(ReferenceData.begin(), ReferenceData.end(), 0);
+
+  // Add alloc and free commands
+  T *AsyncPtr = nullptr;
+  // Add alloc node
+  auto AllocNode = Graph.add([&](handler &CGH) {
+    AsyncPtr = static_cast<T *>(
+        exp_ext::async_malloc(CGH, usm::alloc::device, Size * sizeof(T)));
+  });
+
+  auto KernelNode = Graph.add(
+      [&](handler &CGH) {
+        CGH.parallel_for(range<1>(Size), [=](item<1> ID) {
+          size_t LinID = ID.get_linear_id();
+          AsyncPtr[LinID] = static_cast<T>(LinID);
+        });
+      },
+      {exp_ext::property::node::depends_on{AllocNode}});
+
+  // Copy data out for verification
+  auto CopyNode = Graph.add(
+      [&](handler &CGH) {
+        CGH.memcpy(OutputData.data(), AsyncPtr, Size * sizeof(T));
+      },
+      {exp_ext::property::node::depends_on{KernelNode}});
+
+  // Free memory, node depends on only the associated allocation node
+  Graph.add([&](handler &CGH) { exp_ext::async_free(CGH, AsyncPtr); },
+            {exp_ext::property::node::depends_on{CopyNode}});
+
+  // Constrain scope of GraphExec
+  {
+    auto GraphExec = Graph.finalize();
+    // Graphs support CRS so copying here does not create a separate instance
+    // and should be allowed.
+    auto GraphExec2 = GraphExec;
+
+    // Check that the graph executes correctly
+
+    Queue.ext_oneapi_graph(GraphExec).wait_and_throw();
+
+    for (size_t i = 0; i < Size; i++) {
+      assert(check_value(i, ReferenceData[i], OutputData[i], "OutputData"));
+    }
+
+    std::error_code ExceptionCode = make_error_code(sycl::errc::success);
+    try {
+      // GraphExec is still alive so this should be an error
+      Graph.finalize();
+    } catch (const exception &e) {
+      ExceptionCode = e.code();
+    }
+
+    assert(ExceptionCode == sycl::errc::invalid);
+  }
+
+  // GraphExec and GraphExec2 are now destroyed, so we should be able to
+  // finalize again
+  auto GraphExec = Graph.finalize();
+
+  // Check that the graph executes correctly again
+
+  Queue.ext_oneapi_graph(GraphExec).wait_and_throw();
+
+  for (size_t i = 0; i < Size; i++) {
+    assert(check_value(i, ReferenceData[i], OutputData[i], "OutputData"));
+  }
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_free_exception.cpp b/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_free_exception.cpp
new file mode 100644
index 0000000000000..aae37b1437174
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/async_alloc_free_exception.cpp
@@ -0,0 +1,52 @@
+// RUN: %{build} -o %t.out
+// RUN: %{run} %t.out
+
+// Tests that attempting to free a pointer in a graph that doesn't have an
+// associated allocation node fails in a range of scenarios
+
+#include "../graph_common.hpp"
+#include <sycl/ext/oneapi/experimental/async_alloc/async_alloc.hpp>
+
+using T = int;
+
+// Attempts to add a free to the graph for a pointer with no associated
+// allocation, and check that the correct exception is returned.
+void addInvalidFreeAndCheckForException(
+    exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph) {
+  void *FakePtr = (void *)1;
+  std::error_code ExceptionCode = make_error_code(sycl::errc::success);
+  try {
+    Graph.add([&](handler &CGH) { exp_ext::async_free(CGH, FakePtr); });
+  } catch (const exception &e) {
+    ExceptionCode = e.code();
+  }
+
+  assert(ExceptionCode == sycl::errc::invalid);
+}
+
+int main() {
+  queue Queue{};
+
+  exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
+
+  // Add an invalid free to the graph
+  addInvalidFreeAndCheckForException(Graph);
+
+  void *AsyncPtr = nullptr;
+  // Add a real allocation node
+  auto AllocNode = Graph.add([&](handler &CGH) {
+    AsyncPtr = sycl::ext::oneapi::experimental::async_malloc(
+        CGH, usm::alloc::device, Size);
+  });
+
+  // Try the invalid free again
+  addInvalidFreeAndCheckForException(Graph);
+
+  // Add a real free node
+  Graph.add([&](handler &CGH) { exp_ext::async_free(CGH, AsyncPtr); });
+
+  // Try the invalid free again
+  addInvalidFreeAndCheckForException(Graph);
+
+  return 0;
+}
diff --git a/sycl/test-e2e/Graph/AsyncAlloc/lit.local.cfg b/sycl/test-e2e/Graph/AsyncAlloc/lit.local.cfg
new file mode 100644
index 0000000000000..4b1bd2c979d99
--- /dev/null
+++ b/sycl/test-e2e/Graph/AsyncAlloc/lit.local.cfg
@@ -0,0 +1,5 @@
+# Async alloc tests require virtual memory for device allocations
+# Async alloc extension is required to support creating memory pools
+config.required_features += ['aspect-ext_oneapi_virtual_mem', 'aspect-ext_oneapi_limited_graph', 'aspect-ext_oneapi_async_memory_alloc']
+# V2 adapter does not support async alloc api yet
+config.unsupported_features += ['level_zero_v2_adapter']
diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump
index 967e13b09d739..4a02e92dae289 100644
--- a/sycl/test/abi/sycl_symbols_linux.dump
+++ b/sycl/test/abi/sycl_symbols_linux.dump
@@ -3650,6 +3650,7 @@ _ZNK4sycl3_V13ext6oneapi12experimental21dynamic_command_group16get_active_indexE
 _ZNK4sycl3_V13ext6oneapi12experimental4node14get_successorsEv
 _ZNK4sycl3_V13ext6oneapi12experimental4node16get_predecessorsEv
 _ZNK4sycl3_V13ext6oneapi12experimental4node8get_typeEv
+_ZNK4sycl3_V13ext6oneapi12experimental6detail24executable_command_graph21get_required_mem_sizeEv
 _ZNK4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph11print_graphENS0_6detail11string_viewEb
 _ZNK4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph11print_graphENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEb
 _ZNK4sycl3_V13ext6oneapi12experimental6detail24modifiable_command_graph14get_root_nodesEv
diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump
index 27a22a7044322..b1ef7c89edffa 100644
--- a/sycl/test/abi/sycl_symbols_windows.dump
+++ b/sycl/test/abi/sycl_symbols_windows.dump
@@ -4171,6 +4171,7 @@
 ?get_queue@fusion_wrapper@experimental@codeplay@ext@_V1@sycl@@QEBA?AVqueue@56@XZ
 ?get_range@image_mem@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$range@$02@56@XZ
 ?get_range@image_plain@detail@_V1@sycl@@IEBA?AV?$range@$02@34@XZ
+?get_required_mem_size@executable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ
 ?get_reserved_size_current@memory_pool@experimental@oneapi@ext@_V1@sycl@@QEBA_KXZ
 ?get_root_nodes@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBA?AV?$vector@Vnode@experimental@oneapi@ext@_V1@sycl@@V?$allocator@Vnode@experimental@oneapi@ext@_V1@sycl@@@std@@@std@@XZ
 ?get_size@image_plain@detail@_V1@sycl@@IEBA_KXZ
diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp
index d6977dd3ddbed..f52e690b171e8 100644
--- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp
+++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp
@@ -221,48 +221,8 @@ void addImagesCopies(experimental::detail::modifiable_command_graph &G,
 /// @param G Modifiable graph to add commands to.
 /// @param Q Queue to submit nodes to.
 /// @param Size Size in bytes to allocate.
-/// @param MemPool Memory pool to allocate to.
 /// @param Ptr Generic pointer to allocated memory.
-template <OperationPath PathKind>
-void addAsyncAlloc(experimental::detail::modifiable_command_graph &G, queue &Q,
-                   size_t Size,
-                   sycl::ext::oneapi::experimental::memory_pool &memPool,
-                   [[maybe_unused]] void *Ptr) {
-  // simple alloc with specified pool
-  std::error_code ExceptionCode = make_error_code(sycl::errc::success);
-  try {
-    if constexpr (PathKind == OperationPath::RecordReplay) {
-      Q.submit([&](handler &CGH) {
-        Ptr = sycl::ext::oneapi::experimental::async_malloc_from_pool(CGH, Size,
-                                                                      memPool);
-      });
-    }
-    if constexpr (PathKind == OperationPath::Shortcut) {
-      Ptr = sycl::ext::oneapi::experimental::async_malloc_from_pool(Q, Size,
-                                                                    memPool);
-    }
-    if constexpr (PathKind == OperationPath::Explicit) {
-      G.add([&](handler &CGH) {
-        Ptr = sycl::ext::oneapi::experimental::async_malloc_from_pool(CGH, Size,
-                                                                      memPool);
-      });
-    }
-  } catch (exception &Exception) {
-    ExceptionCode = Exception.code();
-  }
-
-  ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
-}
-
-/// Tries to add nodes including asynchronous allocation instructions to the
-/// graph G. It tests that an invalid exception has been thrown since the
-/// sycl_ext_oneapi_async_alloc can not be used along with SYCL Graph.
-///
-/// @param G Modifiable graph to add commands to.
-/// @param Q Queue to submit nodes to.
-/// @param Size Size in bytes to allocate.
-/// @param Ptr Generic pointer to allocated memory.
-template <OperationPath PathKind>
+template <OperationPath PathKind, usm::alloc AllocKind>
 void addAsyncAlloc(experimental::detail::modifiable_command_graph &G, queue &Q,
                    size_t Size, [[maybe_unused]] void *Ptr) {
   // simple alloc
@@ -270,57 +230,24 @@ void addAsyncAlloc(experimental::detail::modifiable_command_graph &G, queue &Q,
   try {
     if constexpr (PathKind == OperationPath::RecordReplay) {
       Q.submit([&](handler &CGH) {
-        Ptr = sycl::ext::oneapi::experimental::async_malloc(
-            CGH, sycl::usm::alloc::device, Size);
+        Ptr =
+            sycl::ext::oneapi::experimental::async_malloc(CGH, AllocKind, Size);
       });
     }
     if constexpr (PathKind == OperationPath::Shortcut) {
-      Ptr = sycl::ext::oneapi::experimental::async_malloc(
-          Q, sycl::usm::alloc::device, Size);
+      Ptr = sycl::ext::oneapi::experimental::async_malloc(Q, AllocKind, Size);
     }
     if constexpr (PathKind == OperationPath::Explicit) {
       G.add([&](handler &CGH) {
-        Ptr = sycl::ext::oneapi::experimental::async_malloc(
-            CGH, sycl::usm::alloc::device, Size);
+        Ptr =
+            sycl::ext::oneapi::experimental::async_malloc(CGH, AllocKind, Size);
       });
     }
   } catch (exception &Exception) {
     ExceptionCode = Exception.code();
   }
 
-  ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
-}
-
-/// Tries to add nodes including asynchronous free instructions to the graph G.
-/// It tests that an invalid exception has been thrown since the
-/// sycl_ext_oneapi_async_alloc can not be used along with SYCL Graph.
-///
-/// @param G Modifiable graph to add commands to.
-/// @param Q Queue to submit nodes to.
-/// @param Ptr Pointer to asynchronously allocated memory to free.
-template <OperationPath PathKind>
-void addAsyncFree(experimental::detail::modifiable_command_graph &G, queue &Q,
-                  void *Ptr) {
-  // simple free
-  std::error_code ExceptionCode = make_error_code(sycl::errc::success);
-  try {
-    if constexpr (PathKind == OperationPath::RecordReplay) {
-      Q.submit([&](handler &CGH) {
-        sycl::ext::oneapi::experimental::async_free(CGH, Ptr);
-      });
-    }
-    if constexpr (PathKind == OperationPath::Shortcut) {
-      sycl::ext::oneapi::experimental::async_free(Q, Ptr);
-    }
-    if constexpr (PathKind == OperationPath::Explicit) {
-      G.add([&](handler &CGH) {
-        sycl::ext::oneapi::experimental::async_free(CGH, Ptr);
-      });
-    }
-  } catch (exception &Exception) {
-    ExceptionCode = Exception.code();
-  }
-  ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
+  ASSERT_EQ(ExceptionCode, sycl::errc::feature_not_supported);
 }
 } // anonymous namespace
 
@@ -981,33 +908,32 @@ TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) {
   ASSERT_THROW(Graph.add(DynCG), sycl::exception);
 }
 
-// ext_oneapi_async_alloc isn't currently supported with SYCL graphs
-TEST_F(CommandGraphTest, AsyncAllocExceptionCheck) {
+// host and shared allocations are not currently supported by graphs, checks for
+// correct exception behaviour.
+TEST_F(CommandGraphTest, AsyncAllocKindExceptionCheck) {
   auto Context = Queue.get_context();
   auto Device = Queue.get_device();
 
-  // Create pool
-  sycl::ext::oneapi::experimental::memory_pool MemPool(
-      Context, Device, sycl::usm::alloc::device);
-
   void *Ptr1 = nullptr;
   void *Ptr2 = nullptr;
 
   Graph.begin_recording(Queue);
 
-  addAsyncAlloc<OperationPath::RecordReplay>(Graph, Queue, 1024, MemPool, Ptr1);
-  addAsyncAlloc<OperationPath::Shortcut>(Graph, Queue, 1024, Ptr2);
-
-  addAsyncFree<OperationPath::RecordReplay>(Graph, Queue, Ptr1);
-  addAsyncFree<OperationPath::Shortcut>(Graph, Queue, Ptr2);
+  addAsyncAlloc<OperationPath::RecordReplay, usm::alloc::host>(Graph, Queue,
+                                                               1024, Ptr1);
+  addAsyncAlloc<OperationPath::RecordReplay, usm::alloc::shared>(Graph, Queue,
+                                                                 1024, Ptr1);
+  addAsyncAlloc<OperationPath::Shortcut, usm::alloc::host>(Graph, Queue, 1024,
+                                                           Ptr2);
+  addAsyncAlloc<OperationPath::Shortcut, usm::alloc::shared>(Graph, Queue, 1024,
+                                                             Ptr2);
 
   Graph.end_recording();
 
   void *Ptr3 = nullptr;
   void *Ptr4 = nullptr;
-  addAsyncAlloc<OperationPath::Explicit>(Graph, Queue, 1024, MemPool, Ptr3);
-  addAsyncAlloc<OperationPath::Explicit>(Graph, Queue, 1024, Ptr4);
-
-  addAsyncFree<OperationPath::Explicit>(Graph, Queue, Ptr3);
-  addAsyncFree<OperationPath::Explicit>(Graph, Queue, Ptr4);
+  addAsyncAlloc<OperationPath::Explicit, usm::alloc::host>(Graph, Queue, 1024,
+                                                           Ptr3);
+  addAsyncAlloc<OperationPath::Explicit, usm::alloc::shared>(Graph, Queue, 1024,
+                                                             Ptr4);
 }