diff --git a/CMakeLists.txt b/CMakeLists.txt index ca73688..c577727 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2024 Pyarelal Knowles, MIT License +# Copyright (c) 2024-2025 Pyarelal Knowles, MIT License cmake_minimum_required(VERSION 3.20) diff --git a/README.md b/README.md index b772c5a..235c8dc 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,14 @@ # decodeless_mappedfile -[`decodeless`](https://github.com/decodeless) (previously no-decode) is a -collection of utility libraries for conveniently reading and writing files via -memory mapping. Components can be used individually or combined. +[`decodeless`](https://github.com/decodeless) is a collection of utility +libraries for conveniently reading and writing files via memory mapping. +Components can be used individually or combined. -`decodeless_mappedfile` is a small cross platform file mapping abstraction that -supports reserving virtual address space and growing a file mapping into it for -an exciting new way to write binary files. Also includes convenient read-only -and writable file mapping objects. +`decodeless_mappedfile` is a small cross platform file mapping abstraction. It +provides simple objects to read and write to existing files. However, the more +interesting feature is a resizable mapped file. It works by reserving some +virtual address space and growing a file mapping into it for a really convenient +and fast way to write binary files. [decodeless_writer](https://github.com/decodeless/writer) conbines this and [decodeless_allocator](https://github.com/decodeless/allocator) to conveniently @@ -33,16 +34,16 @@ write complex data structures directly as binary data. ## Code Example -``` +```cpp // Memory map a read-only file decodeless::file mapped(filename); const int* numbers = reinterpret_cast(mapped.data()); ... ``` -``` +```cpp // Create a file -size_t maxSize = 4096; +size_t maxSize = 1 << 30; // reserved virtual address; can be huge decodeless::resizable_file file(filename, maxSize); EXPECT_EQ(file.size(), 0); EXPECT_EQ(file.data(), nullptr); @@ -58,6 +59,35 @@ EXPECT_EQ(numbers[9], 9); numbers[99] = 99; ``` +## Building + +This is a header-only C++20 library with CMake integration. Use any of: + +- ```cmake + add_subdirectory(path/to/mappedfile) + ``` + +- ```cmake + include(FetchContent) + FetchContent_Declare( + decodeless_mappedfile + GIT_REPOSITORY https://github.com/decodeless/mappedfile.git + GIT_TAG release_tag + GIT_SHALLOW TRUE + ) + FetchContent_MakeAvailable(decodeless_mappedfile) + ``` + +- ```cmake + find_package(decodeless_mappedfile REQUIRED CONFIG PATHS paths/to/search) + ``` + +Then, + +```cmake +target_link_libraries(myproject PRIVATE decodeless::mappedfile) +``` + ## Notes - Windows implementation uses unofficial section API for `NtExtendSection` from diff --git a/include/decodeless/detail/mappedfile_linux.hpp b/include/decodeless/detail/mappedfile_linux.hpp index f3f2a3c..87e8ea0 100644 --- a/include/decodeless/detail/mappedfile_linux.hpp +++ b/include/decodeless/detail/mappedfile_linux.hpp @@ -1,15 +1,16 @@ -// Copyright (c) 2024 Pyarelal Knowles, MIT License +// Copyright (c) 2024-2025 Pyarelal Knowles, MIT License #pragma once +#include #include #include #include #include #include +#include #include #include -#include #include #include #include @@ -128,14 +129,25 @@ class MemoryMap { return static_cast(static_cast(m_address) + offset); } size_t size() const { return m_size; } - void sync(int flags = MS_SYNC | MS_INVALIDATE) + void sync(size_t offset, size_t size) const + requires Writable + { + assert(offset + size <= m_size); + size_t alignedOffset = offset & ~(pageSize() - 1); + size_t alignedSize = size + offset - alignedOffset; + void* offsetAddress = static_cast( + static_cast(const_cast(m_address)) + alignedOffset); + if (msync(offsetAddress, alignedSize, MS_SYNC | MS_INVALIDATE) == -1) + throw LastError(); + } + void sync() const requires Writable { // ENOMEM "Cannot allocate memory" here likely means something remapped // the range before this object went out of scope. I haven't found a // good way to avoid this other than the user being careful to delete // the object before remapping. - if (msync(const_cast(m_address), m_size, flags) == -1) + if (msync(const_cast(m_address), m_size, MS_SYNC | MS_INVALIDATE) == -1) throw LastError(); } void resize(size_t size) { @@ -182,12 +194,22 @@ template class MappedFile { public: using data_type = std::conditional_t; - MappedFile(const fs::path& path, int mapFlags = MAP_PRIVATE) + MappedFile(const fs::path& path) : m_file(path, Writable ? O_RDWR : O_RDONLY) - , m_mapped(nullptr, m_file.size(), mapFlags, m_file, 0) {} + , m_mapped(nullptr, m_file.size(), Writable ? MAP_SHARED : MAP_PRIVATE, m_file, 0) {} data_type data() const { return m_mapped.address(); } size_t size() const { return m_mapped.size(); } + void sync() const + requires Writable + { + m_mapped.sync(); + } + void sync(size_t offset, size_t size) const + requires Writable + { + m_mapped.sync(offset, size); + } private: static constexpr int MapMemoryProtection = Writable ? PROT_READ | PROT_WRITE : PROT_READ; @@ -218,6 +240,14 @@ class ResizableMappedFile { if (size) map(size); } + void sync() const { + if (m_mapped) + m_mapped->sync(); + } + void sync(size_t offset, size_t size) const { + if (m_mapped) + m_mapped->sync(offset, size); + } // Override default move assignment so m_reserved outlives m_mapped ResizableMappedFile& operator=(ResizableMappedFile&& other) noexcept { diff --git a/include/decodeless/detail/mappedfile_windows.hpp b/include/decodeless/detail/mappedfile_windows.hpp index c6f638e..8be0cf5 100644 --- a/include/decodeless/detail/mappedfile_windows.hpp +++ b/include/decodeless/detail/mappedfile_windows.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Pyarelal Knowles, MIT License +// Copyright (c) 2024-2025 Pyarelal Knowles, MIT License #pragma once @@ -104,6 +104,10 @@ class FileHandle : public Handle { throw LastError(); return result.QuadPart; } + void flush() const { + if (!FlushFileBuffers(*this)) + throw LastError(); + } }; class FileMappingHandle : public Handle { @@ -157,9 +161,14 @@ class FileMappingView { void* address() const { return m_address; } MEMORY_BASIC_INFORMATION query() const { MEMORY_BASIC_INFORMATION result; - (void)VirtualQuery(address(), &result, sizeof(result)); + (void)VirtualQuery(m_address, &result, sizeof(result)); return result; } + void flush(size_t offset = 0, size_t bytes = 0) const { + if (!FlushViewOfFile( + static_cast(static_cast(m_address) + offset), bytes)) + throw LastError(); + } private: LPVOID m_address; @@ -174,10 +183,23 @@ class MappedFile { FILE_SHARE_READ | (Writable ? FILE_SHARE_WRITE : 0), nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr) , m_size(m_file.size()) - , m_mapping(m_file, nullptr, PAGE_READONLY, m_size, nullptr) - , m_rawView(m_mapping, FILE_MAP_READ) {} + , m_mapping(m_file, nullptr, Writable ? PAGE_READWRITE : PAGE_READONLY, m_size, nullptr) + , m_rawView(m_mapping, FILE_MAP_READ | (Writable ? FILE_MAP_WRITE : 0)) {} data_type data() const { return m_rawView.address(); } size_t size() const { return m_size; } + void sync() const + requires Writable + { + m_rawView.flush(); // async flush pages of whole mapping + m_file.flush(); // flush metadata and wait + } + void sync(size_t offset, size_t bytes) const + requires Writable + { + assert(offset + bytes <= m_size); + m_rawView.flush(offset, bytes); // async flush pages of range + m_file.flush(); // flush metadata and wait + } private: FileHandle m_file; @@ -243,6 +265,7 @@ class Env { // DANGER: copy/pasting from e.g. // https://learn.microsoft.com/en-us/windows-hardware/drivers/ddi/ntifs/nf-ntifs-ntcreatesection and // http://undocumented.ntinternals.net/index.html?page=UserMode%2FUndocumented%20Functions%2FNT%20Objects%2FSection%2FNtMapViewOfSection.html +// There are a number of differences, mostly involving ULONG -> SIZE_T upgrades for 64 bit // TODO: make WDK a dependency? ugh.. // *dumpbin.exe /EXPORTS Windows/System32/ntdll.dll to verify symbols exist @@ -285,6 +308,14 @@ typedef struct _SECTION_IMAGE_INFORMATION { ULONG Unknown2[3]; } SECTION_IMAGE_INFORMATION, *PSECTION_IMAGE_INFORMATION; +typedef struct _IO_STATUS_BLOCK { + union { + NTSTATUS Status; + PVOID Pointer; + }; + ULONG_PTR Information; +} IO_STATUS_BLOCK, *PIO_STATUS_BLOCK; + using NtAllocateVirtualMemoryType = NTSTATUS(HANDLE ProcessHandle, PVOID* BaseAddress, ULONG_PTR ZeroBits, PSIZE_T RegionSize, ULONG AllocationType, ULONG Protect); @@ -293,6 +324,9 @@ using NtCreateSectionType = NTSTATUS(PHANDLE SectionHandle, ACCESS_MASK DesiredA PLARGE_INTEGER MaximumSize, ULONG SectionPageProtection, ULONG AllocationAttributes, HANDLE FileHandle); using NtExtendSectionType = NTSTATUS(HANDLE SectionHandle, PLARGE_INTEGER NewSectionSize); +using NtFlushVirtualMemoryType = NTSTATUS(HANDLE ProcessHandle, PVOID* BaseAddress, + PSIZE_T NumberOfBytesToFlush, + PIO_STATUS_BLOCK IoStatusBlock); using NtFreeVirtualMemoryType = NTSTATUS(HANDLE ProcessHandle, PVOID* BaseAddress, PSIZE_T RegionSize, ULONG FreeType); using NtMapViewOfSectionType = NTSTATUS(HANDLE SectionHandle, HANDLE ProcessHandle, @@ -305,8 +339,8 @@ using NtOpenSectionType = NTSTATUS(PHANDLE SectionHandle, ACCESS_MASK DesiredAcc using NtCloseType = NTSTATUS(HANDLE Handle); using NtQuerySectionType = NTSTATUS(HANDLE SectionHandle, SECTION_INFORMATION_CLASS InformationClass, - PVOID InformationBuffer, ULONG InformationBufferSize, - PULONG ResultLength); + PVOID InformationBuffer, SIZE_T InformationBufferSize, + PSIZE_T ResultLength); using NtUnmapViewOfSectionType = NTSTATUS(HANDLE ProcessHandle, PVOID BaseAddress); class NtStatusError : public mapping_error { @@ -323,6 +357,7 @@ class NtifsSection { m_ntdll.get("NtAllocateVirtualMemory")) , m_NtCreateSection(m_ntdll.get("NtCreateSection")) , m_NtExtendSection(m_ntdll.get("NtExtendSection")) + , m_NtFlushVirtualMemory(m_ntdll.get("NtFlushVirtualMemory")) , m_NtFreeVirtualMemory(m_ntdll.get("NtFreeVirtualMemory")) , m_NtMapViewOfSection(m_ntdll.get("NtMapViewOfSection")) , m_NtOpenSection(m_ntdll.get("NtOpenSection")) @@ -340,6 +375,7 @@ class NtifsSection { NtAllocateVirtualMemoryType* const m_NtAllocateVirtualMemory; NtCreateSectionType* const m_NtCreateSection; NtExtendSectionType* const m_NtExtendSection; + NtFlushVirtualMemoryType* const m_NtFlushVirtualMemory; NtFreeVirtualMemoryType* const m_NtFreeVirtualMemory; NtMapViewOfSectionType* const m_NtMapViewOfSection; NtOpenSectionType* const m_NtOpenSection; @@ -391,6 +427,7 @@ class VirtualMemory { assert(RegionSize != 0); assert(Offset % pageSizeCached() == 0); assert(RegionSize % pageSizeCached() == 0); +#pragma warning(suppress : 6250) // intentionally decommit without releasing the address space if (!VirtualFree(offsetAddress(Offset), RegionSize, MEM_DECOMMIT)) throw LastError(); } @@ -412,7 +449,7 @@ class VirtualMemory { } } void* offsetAddress(SIZE_T Offset) const { - return reinterpret_cast(reinterpret_cast(m_address) + Offset); + return static_cast(static_cast(m_address) + Offset); } void* m_address = nullptr; }; @@ -431,14 +468,14 @@ static HANDLE createSection(const NtifsSection& dll, ACCESS_MASK DesiredAccess, return result; } -template +template class Section : public Handle { public: Section(Section&& other) noexcept = default; Section(const NtifsSection& dll, ACCESS_MASK DesiredAccess, POBJECT_ATTRIBUTES ObjectAttributes, size_t MaximumSize, ULONG AllocationAttributes, HANDLE FileHandle) - : Handle(createSection(dll, DesiredAccess, ObjectAttributes, MaximumSize, - SectionPageProtection, AllocationAttributes, FileHandle)) + : Handle(createSection(dll, DesiredAccess, ObjectAttributes, MaximumSize, PageProtection, + AllocationAttributes, FileHandle)) , m_dll(dll) , m_size(MaximumSize) {} Section& operator=(Section&& other) noexcept { @@ -462,7 +499,7 @@ class Section : public Handle { SECTION_BASIC_INFORMATION queryBasic() const { SECTION_BASIC_INFORMATION result; - ULONG written{}; + PSIZE_T written{}; NTSTATUS status = m_dll.m_NtQuerySection(*this, SectionBasicInformation, &result, sizeof(result), &written); if (status != STATUS_SUCCESS) @@ -472,7 +509,7 @@ class Section : public Handle { SECTION_IMAGE_INFORMATION queryImage() const { SECTION_IMAGE_INFORMATION result; - ULONG written{}; + PSIZE_T written{}; NTSTATUS status = m_dll.m_NtQuerySection(*this, SectionImageInformation, &result, sizeof(result), &written); if (status != STATUS_SUCCESS) @@ -488,23 +525,23 @@ class Section : public Handle { static_assert(std::is_move_constructible_v>); static_assert(std::is_move_assignable_v>); -template +template class SectionView { public: static constexpr bool writable = - (SectionPageProtection & + (PageProtection & (PAGE_READWRITE | PAGE_WRITECOPY | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY)) != 0; using address_type = std::conditional_t; SectionView() = delete; SectionView(const SectionView& other) = delete; - SectionView(const NtifsSection& dll, const Section& Section, + SectionView(const NtifsSection& dll, const Section& Section, HANDLE ProcessHandle, ULONG_PTR ZeroBits, size_t CommitSize, size_t SectionOffset, size_t ViewSize, SECTION_INHERIT InheritDisposition, ULONG AllocationType) : m_dll(dll) , m_process(ProcessHandle) , m_address(mapViewOfSection(dll, Section, ProcessHandle, ZeroBits, CommitSize, SectionOffset, ViewSize, InheritDisposition, AllocationType, - SectionPageProtection)) {} + PageProtection)) {} ~SectionView() { if (m_address) unmap(); @@ -531,6 +568,18 @@ class SectionView { sizeof(result)); return result; } + void flush(size_t offset, size_t bytes) const + requires writable + { + address_type mutAddress = + static_cast(static_cast(m_address) + offset); + SIZE_T mutBytes = bytes; + IO_STATUS_BLOCK statusBlock; // ignored + NTSTATUS status = + m_dll.m_NtFlushVirtualMemory(m_process, &mutAddress, &mutBytes, &statusBlock); + if (status != STATUS_SUCCESS) + throw NtStatusError(m_dll.ntdll(), "NtFlushVirtualMemory", status); + } private: static void* mapViewOfSection(const NtifsSection& dll, HANDLE SectionHandle, @@ -607,6 +656,19 @@ class ResizableMappedFile { ViewUnmap, MEM_RESERVE); } } + void sync() const { + if (m_view && m_section) { + m_view->flush(0, m_section->size()); // async flush pages of whole mapping + m_file.flush(); // flush metadata and wait + } + } + void sync(size_t offset, size_t bytes) const { + assert(offset + bytes <= size()); + if (m_view) { + m_view->flush(offset, bytes); // async flush pages of range + m_file.flush(); // flush metadata and wait + } + } private: size_t m_capacity = 0; @@ -649,10 +711,6 @@ class ResizableMappedMemory { } private: - static NtifsSection& ntifs() { - static NtifsSection ntifs; - return ntifs; - } size_t m_capacity = 0; size_t m_size = 0; size_t m_mappedSize = 0; diff --git a/include/decodeless/mappedfile.hpp b/include/decodeless/mappedfile.hpp index 92d5d37..7182226 100644 --- a/include/decodeless/mappedfile.hpp +++ b/include/decodeless/mappedfile.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Pyarelal Knowles, MIT License +// Copyright (c) 2024-2025 Pyarelal Knowles, MIT License // Inspiration: // - https://nfrechette.github.io/2015/06/11/vmem_linear_allocator/ @@ -18,7 +18,9 @@ namespace decodeless { -// May throw std::bad_alloc or std::runtime_error (mapped_file_error or +// These types are provided by the platform specific implementations included +// above. Below are C++ concepts to verify a common interface. Constructors may +// throw std::bad_alloc or std::runtime_error (mapped_file_error or // mapping_error) using file = detail::MappedFile; using writable_file = detail::MappedFile; @@ -41,6 +43,8 @@ concept writable_mapped_file = std::is_constructible_v && move_only && requires(T t) { { t.data() } -> std::same_as; { t.size() } -> std::same_as; + { t.sync() } -> std::same_as; + { t.sync(std::declval(), std::declval()) } -> std::same_as; }; template @@ -51,9 +55,15 @@ concept resizable_mapped_memory = move_only && requires(T t) { { t.resize(std::declval()) } -> std::same_as; }; +template +concept resizable_mapped_file = resizable_mapped_memory && requires(T t) { + { t.sync() } -> std::same_as; + { t.sync(std::declval(), std::declval()) } -> std::same_as; +}; + static_assert(mapped_file); static_assert(writable_mapped_file); -static_assert(resizable_mapped_memory); +static_assert(resizable_mapped_file); static_assert(std::is_constructible_v); static_assert(resizable_mapped_memory); static_assert(std::is_constructible_v); diff --git a/test/src/mappedfile.cpp b/test/src/mappedfile.cpp index c5871e9..06922ef 100644 --- a/test/src/mappedfile.cpp +++ b/test/src/mappedfile.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Pyarelal Knowles, MIT License +// Copyright (c) 2024-2025 Pyarelal Knowles, MIT License #include #include @@ -26,7 +26,36 @@ class MappedFileFixture : public ::testing::Test { TEST_F(MappedFileFixture, ReadOnly) { file mapped(m_tmpFile); - EXPECT_EQ(*reinterpret_cast(mapped.data()), 42); + EXPECT_EQ(*static_cast(mapped.data()), 42); +} + +TEST_F(MappedFileFixture, Writable) { + { + writable_file mapped(m_tmpFile); + ASSERT_GE(mapped.size(), sizeof(int)); + *static_cast(mapped.data()) = 123; + } + { + std::ifstream ifile(m_tmpFile, std::ios::binary); + int contents; + ifile.read(reinterpret_cast(&contents), sizeof(contents)); + EXPECT_EQ(contents, 123); + } +} + +TEST_F(MappedFileFixture, WritableSync) { + writable_file mapped(m_tmpFile); + ASSERT_GE(mapped.size(), sizeof(int)); + *static_cast(mapped.data()) = 123; + mapped.sync(); + { + // This test will always pass even without the .sync() as it just reads the same pages in + // memory + std::ifstream ifile(m_tmpFile, std::ios::binary); + int contents; + ifile.read(reinterpret_cast(&contents), sizeof(contents)); + EXPECT_EQ(contents, 123); + } } #ifdef _WIN32 @@ -405,6 +434,28 @@ TEST_F(MappedFileFixture, ResizableFileSize) { EXPECT_EQ(fs::file_size(m_tmpFile), lastSize); } +TEST_F(MappedFileFixture, ResizableFileSync) { + resizable_file file(m_tmpFile, 10000); + size_t sizes[] = {1, 2, 4000, 4095, 4096, 4097, 10000, 1, 4097, 4096, 4095, 42}; + for (size_t size : sizes) { + file.resize(size); + static_cast(file.data())[size - 1] = '*'; + file.sync(size - 1, 1); + { + // This is pointless because we're just reading the same pages, whether they're flushed + // to disk or not. Tried reading metadata like std::filesystem::last_write_time, but not + // reliable enough for testing. + std::ifstream ifile(m_tmpFile, std::ios::binary); + char c; + ifile.seekg(size - 1); + ifile.read(&c, 1); + EXPECT_EQ(c, '*'); + } + static_cast(file.data())[size - 1] = '_'; + file.sync(); + } +} + TEST_F(MappedFileFixture, Readme) { fs::path tmpFile2 = fs::path{testing::TempDir()} / "test2.dat"; {