From 23284e62ba355b80f2f791112092de3f7578409d Mon Sep 17 00:00:00 2001 From: Walter Bright Date: Mon, 25 Mar 2024 23:33:47 -0700 Subject: [PATCH] Support library source file archives --- changelog/dmd.source-archive.dd | 97 +++++++++ compiler/src/dmd/cli.d | 12 ++ compiler/src/dmd/common/file.d | 117 +++++++++++ compiler/src/dmd/file_manager.d | 345 ++++++++++++++++++++++++++++++-- compiler/src/dmd/frontend.h | 13 +- compiler/src/dmd/globals.d | 3 + compiler/src/dmd/globals.h | 4 + compiler/src/dmd/main.d | 8 + compiler/src/dmd/mars.d | 22 ++ compiler/src/dmd/root/file.d | 2 +- compiler/src/dmd/root/rmem.d | 2 +- 11 files changed, 603 insertions(+), 22 deletions(-) create mode 100644 changelog/dmd.source-archive.dd diff --git a/changelog/dmd.source-archive.dd b/changelog/dmd.source-archive.dd new file mode 100644 index 000000000000..4fdb61893ccc --- /dev/null +++ b/changelog/dmd.source-archive.dd @@ -0,0 +1,97 @@ +# Support Source Archive Files + +This is a compiler feature, not a D language feature. + +Similar to how libraries of object files are made available to the linker, +this adds source archive file support to the compiler. Any package (and all its +sub-files) can become a source archive file. The source archive file is then +supplied to the compiler rather than a directory with lots of files in it. + +This means, for example, that all of Phobos can be distributed as a single +file, std.sar. (The .sar extension stands for "source archive".) If std.sar +is in a path along the import path list supplied to the compiler, the +compiler will prefer std.sar to looking through the std directory tree for the +sub-modules. The std directory wouldn't even need to exist. + +The file format of the .sar file is very similar to that of object file libraries +and various other schemes. It does not adhere to those other schemes due to their +variances from platform to platform, all the code needed to support things that +are unneeded for .sar files, and special consideration for D's needs. The format +is meant to be friendly for memory-mapped file access, and does not have alignhment +issues. + +A .sar file consists of the following sections: + +1. a header, to identify it as a .sar file with a magic number and a version + +2. a table of contents, one entry per source file. The entries consist of +an offset/length to the filename string, and an offset/length to the file +contents + +3. the filename strings, each string has a terminating 0 + +4. the file contents, each file has four 0 bytes appended, as the lexer wants +that as a sentinel + +5. the integers in the format are little-endian + +To create a .sar file, such as one for Phobos' std: + +dmd -sar=/home/duser/dmd/src/phobos/std + +and the file: + +/home/duser/dmd/src/phobos/std.sar + +will be created and filled with all sub-files with one of the extensions ".di", ".d", +".c", or ".i". The filenames in the .sar file will include the path prefix +specified in the -sar switch. This means the .sar file cannot be successfully moved +to another location. + +For Phobos, std.sar is approximately 11 megabytes in size. + +To use the std.sar file, nothing needs to be changed in the user's build system. +dmd will automatically prefer using any .sar file it finds. To disable using +.sar files, which would be necessary when doing development of the source files, +select one of the following: + +1. delete the .sar file + +2. use the -sar=off compiler switch. -sar=on turns it on, and is the default +setting + +Trying out .sar with simple programs like hello world yield a negligible difference +in compile speed. It's unlikely a larger program would show any particular trend +in performance. + +A standalone archiver program can be easily created from the implementation in DMD. + +## Rationale + +1. All the source files in a project or library can be represented as a single file, +making it easy to deal with. + +2. To compile all the source files at once with DMD, the command line can get +extremely long, and certainly unwieldy. With .sar files, you may not even need +a makefile or builder, just: + +dmd project.sar + +3. In Phobos (and most code), the tendency is to lump a lot of only marginally related +functions into one file. This is likely because of the inconvenience of multiple files. +In std.algorithm, for example, the individual algorithms could be placed into multiple +files, since they don't refer to each other. This could also help the people who +don't want the automatic "friend" status of declarations within a single module. +.sar files can make much more granular modules more attractive. + +4. A directory in a project can be anything. But multiple .sar files in a project means +that's where the code is. Multiple versions of the project can exist in the same directory +by using .sar files with different names. + +5. Experiments with small programs (hello world) show a negligible change in compile +speed. Much larger programs may show significant compile speed increases with .sar +files, simply because there are fewer file operations. For slow file systems, such as +SD cards, or network file systems, the speedup could be substantial. + +None of these make it a slam-dunk, after all, no other compiler does this that I'm +aware of. Even so, some surprising uses can be expected of .sar files. diff --git a/compiler/src/dmd/cli.d b/compiler/src/dmd/cli.d index 24cf50729b23..687679a3eea3 100644 --- a/compiler/src/dmd/cli.d +++ b/compiler/src/dmd/cli.d @@ -766,6 +766,18 @@ dmd -cov -unittest myprog.d `$(UNIX Generate shared library) $(WINDOWS Generate DLL library)`, ), + Option("sar=[on|off|]", + "turn reading source archive files on or off, or create source archive at ", + `Controls source archive files and usage. + $(UL + $(LI $(I on): use source archive files (default)) + $(LI $(I off): ignore source archive files) + $(LI $(I path/package): create source archive file. + is where the root package of the files to be archived are. + All the modules in are written to the source archive file . + Do not use in combination with compiling, as that will be very slow.) + )` + ), Option("target=", "use as -[-][-[-= uint.max / Content.sizeof - srcArchiveHeader.contentsOffset || // overflow check + data.length < srcArchiveHeader.contentsOffset + srcArchiveHeader.contentsLength * Content.sizeof) + { + printf("corrupt .sar file %d\n", __LINE__); + return false; // corrupt file + } + + foreach (i; 0 .. srcArchiveHeader.contentsLength) + { + const Content* cp = cast(const(Content)*)(data.ptr + srcArchiveHeader.contentsOffset + i * Content.sizeof); + + if (data.length <= cp.nameOffset || + cp.nameOffset >= cp.nameOffset + cp.nameLength + 1 || + data.length <= cp.nameOffset + cp.nameLength + 1 || + data.length <= cp.importOffset || + cp.importOffset >= cp.importOffset + cp.importLength + 1 || + data.length <= cp.importOffset + cp.importLength + 1) + { + printf("corrupt .sar file %d\n", __LINE__); + return false; // corrupt file + } + + const(char)[] fileName = (data.ptr + cp.nameOffset)[0 .. cp.nameLength]; + const(ubyte)[] fileContents = (cast(const(ubyte)*)(data.ptr + cp.importOffset))[0 .. cp.importLength]; + + //printf("fileContents: %s %p[%lld]\n", fileName.ptr, fileContents.ptr, fileContents.length); + //printf("fileContents: %s\n", fileContents.ptr); + + // Cache file name and file contents (but don't read the file contents!) + const(char)[] fqn = FileName.combine(path, fileName); + fileManager.addFileNameAndContents(fqn, fileContents); + if (log) printf("fqn: %.*s\n", cast(int)fqn.length, fqn.ptr); + } + if (log) printf("done reading .sar file\n"); + + return true; +} + +/*************************** + * The source archive file starts with this header. + */ +struct SrcArchiveHeader +{ + enum MagicNumber = 0x64FE_ED63; + uint magicNumber = MagicNumber; // don't collide with other file types + uint versionNumber = 1; // so we can change the format + uint contentsOffset; // file offset to start of contents section + uint contentsLength; // the number of contents in the contents section +} + +/* The contents section is an array of Contents + */ +struct Content +{ + uint nameOffset; // file offset to name of file + uint nameLength; // number of characters in name, excluding terminating 0 + uint importOffset; // file offset to start of imported file contents + uint importLength; // number of characters in the imported file contents, excluding terminating 0 +} diff --git a/compiler/src/dmd/frontend.h b/compiler/src/dmd/frontend.h index 22e02c382f29..7fc91e4dd748 100644 --- a/compiler/src/dmd/frontend.h +++ b/compiler/src/dmd/frontend.h @@ -7938,6 +7938,7 @@ struct Help final bool checkAction; bool revert; bool preview; + bool sar; bool externStd; bool hc; Help() : @@ -7949,11 +7950,12 @@ struct Help final checkAction(), revert(), preview(), + sar(), externStd(), hc() { } - Help(bool manual, bool usage = false, bool mcpu = false, bool transition = false, bool check = false, bool checkAction = false, bool revert = false, bool preview = false, bool externStd = false, bool hc = false) : + Help(bool manual, bool usage = false, bool mcpu = false, bool transition = false, bool check = false, bool checkAction = false, bool revert = false, bool preview = false, bool sar = false, bool externStd = false, bool hc = false) : manual(manual), usage(usage), mcpu(mcpu), @@ -7962,6 +7964,7 @@ struct Help final checkAction(checkAction), revert(revert), preview(preview), + sar(sar), externStd(externStd), hc(hc) {} @@ -8053,6 +8056,7 @@ struct Param final bool addMain; bool allInst; bool bitfields; + bool useSourceArchive; CppStdRevision cplusplus; Help help; Verbose v; @@ -8088,6 +8092,7 @@ struct Param final _d_dynamicArray< const char > objdir; _d_dynamicArray< const char > objname; _d_dynamicArray< const char > libname; + _d_dynamicArray< const char > pathPackage; Output ddoc; Output dihdr; Output cxxhdr; @@ -8136,6 +8141,7 @@ struct Param final addMain(), allInst(), bitfields(), + useSourceArchive(true), cplusplus((CppStdRevision)201103u), help(), v(), @@ -8165,6 +8171,7 @@ struct Param final objdir(), objname(), libname(), + pathPackage(), ddoc(), dihdr(), cxxhdr(), @@ -8189,7 +8196,7 @@ struct Param final mapfile() { } - Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, CLIIdentifierTable dIdentifierTable = (CLIIdentifierTable)0u, CLIIdentifierTable cIdentifierTable = (CLIIdentifierTable)0u, _d_dynamicArray< const char > argv0 = {}, Array modFileAliasStrings = Array(), Array imppath = Array(), Array fileImppath = Array(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array runargs = Array(), Array cppswitches = Array(), const char* cpp = nullptr, Array objfiles = Array(), Array linkswitches = Array(), Array linkswitchIsForCC = Array(), Array libfiles = Array(), Array dllfiles = Array(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) : + Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, bool useSourceArchive = true, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, CLIIdentifierTable dIdentifierTable = (CLIIdentifierTable)0u, CLIIdentifierTable cIdentifierTable = (CLIIdentifierTable)0u, _d_dynamicArray< const char > argv0 = {}, Array modFileAliasStrings = Array(), Array imppath = Array(), Array fileImppath = Array(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, _d_dynamicArray< const char > pathPackage = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array runargs = Array(), Array cppswitches = Array(), const char* cpp = nullptr, Array objfiles = Array(), Array linkswitches = Array(), Array linkswitchIsForCC = Array(), Array libfiles = Array(), Array dllfiles = Array(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) : obj(obj), multiobj(multiobj), trace(trace), @@ -8213,6 +8220,7 @@ struct Param final addMain(addMain), allInst(allInst), bitfields(bitfields), + useSourceArchive(useSourceArchive), cplusplus(cplusplus), help(help), v(v), @@ -8248,6 +8256,7 @@ struct Param final objdir(objdir), objname(objname), libname(libname), + pathPackage(pathPackage), ddoc(ddoc), dihdr(dihdr), cxxhdr(cxxhdr), diff --git a/compiler/src/dmd/globals.d b/compiler/src/dmd/globals.d index f8291fa5ce7e..b1af4d220342 100644 --- a/compiler/src/dmd/globals.d +++ b/compiler/src/dmd/globals.d @@ -108,6 +108,7 @@ extern(C++) struct Help bool checkAction; // -checkaction bool revert; // -revert bool preview; // -preview + bool sar; // -sar bool externStd; // -extern-std bool hc; // -HC } @@ -170,6 +171,7 @@ extern (C++) struct Param bool addMain; // add a default main() function bool allInst; // generate code for all template instantiations bool bitfields; // support C style bit fields + bool useSourceArchive = true; // read source files from source archive library (.sar file) CppStdRevision cplusplus = CppStdRevision.cpp11; // version of C++ standard to support @@ -219,6 +221,7 @@ extern (C++) struct Param const(char)[] objdir; // .obj/.lib file output directory const(char)[] objname; // .obj file output name const(char)[] libname; // .lib file output name + const(char)[] pathPackage; // path/package for path/package.sar file Output ddoc; // Generate embedded documentation comments Output dihdr; // Generate `.di` 'header' files diff --git a/compiler/src/dmd/globals.h b/compiler/src/dmd/globals.h index ac2b2867fb7c..063005505842 100644 --- a/compiler/src/dmd/globals.h +++ b/compiler/src/dmd/globals.h @@ -119,6 +119,7 @@ struct Help d_bool checkAction; // -checkaction d_bool revert; // -revert d_bool preview; // -preview + d_bool sar; // -sar d_bool externStd; // -extern-std d_bool hc; // -HC }; @@ -173,6 +174,8 @@ struct Param d_bool addMain; // add a default main() function d_bool allInst; // generate code for all template instantiations d_bool bitfields; // support C style bit fields + d_bool useSourceArchive; // read source files from source archive library (.sar file) + CppStdRevision cplusplus; // version of C++ name mangling to support Help help; @@ -221,6 +224,7 @@ struct Param DString objdir; // .obj/.lib file output directory DString objname; // .obj file output name DString libname; // .lib file output name + DString pathPackage; // path to .sar source archive file Output ddoc; // Generate embedded documentation comments Output dihdr; // Generate `.di` 'header' files diff --git a/compiler/src/dmd/main.d b/compiler/src/dmd/main.d index 27f79379abae..08e5067953df 100644 --- a/compiler/src/dmd/main.d +++ b/compiler/src/dmd/main.d @@ -320,6 +320,13 @@ private int tryMain(size_t argc, const(char)** argv, ref Param params) { fatal(); } + if (params.pathPackage.length) + { + if (writeSourceArchive(params.pathPackage)) + fatal(); + return EXIT_SUCCESS; + } + if (files.length == 0) { if (params.jsonFieldFlags) @@ -827,6 +834,7 @@ bool parseCommandlineAndConfig(size_t argc, const(char)** argv, ref Param params envsection.ptr, cast(int)global.inifilename.length, global.inifilename.ptr); global.preprocess = &preprocess; + global.fileManager.setUseSourceArchive(global.params.useSourceArchive); return false; } diff --git a/compiler/src/dmd/mars.d b/compiler/src/dmd/mars.d index 06f694e9e472..363e532a2c4c 100644 --- a/compiler/src/dmd/mars.d +++ b/compiler/src/dmd/mars.d @@ -1205,6 +1205,28 @@ bool parseCommandLine(const ref Strings arguments, const size_t argc, ref Param return false; } } + else if (startsWith(p + 1, "sar")) // https://dlang.org/dmd.html#switch-sar + { + /* Parse: + * -sar=on|off|path/package.sar + */ + enum len = "-sar=".length; + mixin(checkOptionsMixin("sar", + "`-sar=` requires a behavior")); + switch (arg[len .. $]) + { + case "on": + params.useSourceArchive = true; + break; + case "off": + params.useSourceArchive = false; + break; + default: + params.useSourceArchive = false; + params.pathPackage = arg[len .. $]; + break; + } + } else if (arg == "-w") // https://dlang.org/dmd.html#switch-w params.warnings = DiagnosticReporting.error; else if (arg == "-wi") // https://dlang.org/dmd.html#switch-wi diff --git a/compiler/src/dmd/root/file.d b/compiler/src/dmd/root/file.d index 034dba61b13f..ee7170e230c8 100644 --- a/compiler/src/dmd/root/file.d +++ b/compiler/src/dmd/root/file.d @@ -92,7 +92,7 @@ nothrow: version (Posix) { - //printf("File::read('%s')\n",name); + //printf("File::read('%.*s')\n", cast(int)name.length, name.ptr); int fd = name.toCStringThen!(slice => open(slice.ptr, O_RDONLY)); if (fd == -1) { diff --git a/compiler/src/dmd/root/rmem.d b/compiler/src/dmd/root/rmem.d index 19652072376b..c6986c0b56fe 100644 --- a/compiler/src/dmd/root/rmem.d +++ b/compiler/src/dmd/root/rmem.d @@ -318,7 +318,7 @@ Params: Returns: A null-terminated copy of the input array. */ -extern (D) char[] xarraydup(const(char)[] s) pure nothrow +extern (D) char[] xarraydup(scope const(char)[] s) pure nothrow { if (!s) return null;