From 7b88bd1c5a89e7ba0b783bcb0665190c2e7459ec Mon Sep 17 00:00:00 2001 From: Walter Bright Date: Mon, 25 Mar 2024 23:33:47 -0700 Subject: [PATCH] Support library source file archives --- changelog/dmd.source-archive.dd | 79 +++ compiler/src/dmd/cli.d | 12 + compiler/src/dmd/common/file.d | 166 ++++++ compiler/src/dmd/file_manager.d | 515 ++++++++++++++++-- compiler/src/dmd/frontend.h | 13 +- compiler/src/dmd/globals.d | 4 + compiler/src/dmd/globals.h | 4 + compiler/src/dmd/main.d | 38 ++ compiler/src/dmd/mars.d | 24 +- .../runnable/extra-files/archive/sample.d | 7 + compiler/test/runnable/extra-files/testsar1.d | 9 + compiler/test/runnable/extra-files/testsar2.d | 11 + compiler/test/runnable/testsar.sh | 17 + 13 files changed, 838 insertions(+), 61 deletions(-) create mode 100644 changelog/dmd.source-archive.dd create mode 100644 compiler/test/runnable/extra-files/archive/sample.d create mode 100644 compiler/test/runnable/extra-files/testsar1.d create mode 100644 compiler/test/runnable/extra-files/testsar2.d create mode 100644 compiler/test/runnable/testsar.sh diff --git a/changelog/dmd.source-archive.dd b/changelog/dmd.source-archive.dd new file mode 100644 index 000000000000..e72ed81a7dd1 --- /dev/null +++ b/changelog/dmd.source-archive.dd @@ -0,0 +1,79 @@ +# Support Source Archive Files + +This is a compiler feature, not a D language feature. + +Similar to how libraries of object files are made available to the linker, +this adds source archive file support to the compiler. Any package (and all its +sub-files) can become a source archive file. The source archive file is then +supplied to the compiler rather than a directory with lots of files in it. + +This means, for example, that all of Phobos can be distributed as a single +file, std.sar. (The .sar extension stands for "source archive".) If std.sar +is in a path along the import path list supplied to the compiler, the +compiler will prefer std.sar to looking through the std directory tree for the +sub-modules. The std directory wouldn't even need to exist. + +All the source files in a project or library can be represented as a single +file, making it easy to deal with. + +The file format of the .sar file is very similar to that of object file libraries +and various other schemes. It does not adhere to those other schemes due to their +variances from platform to platform, all the code needed to support things that +are unneeded for .sar files, and special consideration for D's needs. The format +is meant to be friendly for memory-mapped file access, and does not have alignment +issues. + +A .sar file consists of the following sections: + +1. a header, to identify it as a .sar file with a magic number and a version + +2. a table of contents, one entry per source file. The entries consist of +an offset/length to the filename string, and an offset/length to the file +contents + +3. the filename strings, each string has a terminating 0 + +4. the file contents, each file has four 0 bytes appended, as the lexer wants +that as a sentinel + +5. the integers in the format are native-endian (which is little-endian for x86) + +To create a .sar file, such as one for Phobos' std: + +dmd -sar=/home/duser/dmd/src/phobos/std + +and the file: + +/home/duser/dmd/src/phobos/std.sar + +will be created and filled with all sub-files with one of the extensions ".di", ".d", +".c", or ".i". + +For Phobos, std.sar is approximately 11 megabytes in size. + +To use the std.sar file, nothing needs to be changed in the user's build system. +dmd will automatically prefer using any .sar file it finds. To disable using +.sar files, which would be necessary when doing development of the source files, +select one of the following: + +1. delete the .sar file + +2. use the -sar=off compiler switch. -sar=on turns it on, and is the default +setting + +Trying out .sar with simple programs like hello world yield a negligible difference +in compile speed. It's unlikely a larger program would show any particular trend +in performance. + +A standalone archiver program can be easily created from the implementation in DMD. + +Another way to use a .sar file is to simply add it to the command line: + +dmd foo.sar + +If foo.sar contains the files a.d, b.d and c.d, the command is equivalent to: + +dmd a.d b.d c.d + +I.e .sar files are simply a way to "bundle" a bunch of source files into a single +file. diff --git a/compiler/src/dmd/cli.d b/compiler/src/dmd/cli.d index b69fed52ae57..ebd97c589af3 100644 --- a/compiler/src/dmd/cli.d +++ b/compiler/src/dmd/cli.d @@ -766,6 +766,18 @@ dmd -cov -unittest myprog.d `$(UNIX Generate shared library) $(WINDOWS Generate DLL library)`, ), + Option("sar=[on|off|]", + "turn reading source archive files on or off, or create source archive at ", + `Controls source archive files and usage. + $(UL + $(LI $(I on): use source archive files (default)) + $(LI $(I off): ignore source archive files) + $(LI $(I path/package): create source archive file. + is where the root package of the files to be archived are. + All the modules in are written to the source archive file . + Do not use in combination with compiling, as that will be very slow.) + )` + ), Option("target=", "use as -[-][-[- fullPathLength) + { + fullPathLength = count; + fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1); + } + memcpy(fullPath, dir_path, dir_pathLength); + strcpy(fullPath + dir_pathLength + 1, "\\*.*".ptr); + + //printf("fullPath: %s\n", fullPath); + + WIN32_FIND_DATAW ffd = void; + HANDLE hFind = fullPath[0 .. strlen(fullPath)].extendedPathThen!(p => FindFirstFileW(p.ptr, &ffd)); + if (hFind == INVALID_HANDLE_VALUE) + return true; + + do + { + //fprintf(stderr, "cFileName: %s\n", ffd.cFileName.ptr); + if (ffd.cFileName[0] == 0) + continue; // ignore + + if (ffd.cFileName[0] == '.') + continue; // ignore files that start with a ., also ignore . and .. directories + + const(char)[] name = toNarrowStringz(ffd.cFileName[0 .. wcslen(ffd.cFileName.ptr)], null); + + // fullPath = dir_path \ name.ptr + count = dir_pathLength + 1 + name.length; + if (count > fullPathLength) + { + fullPathLength = count; + fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1); + } + strcpy(fullPath + dir_pathLength + 1, name.ptr); + + if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + { + if (recurse) + findFiles(fullPath, exts, recurse, filenameSink); + } + else + { + const(char)[] nameExt = FileName.ext(name); + foreach (ext; exts[]) + { + if (nameExt == ext) + { + //printf("adding %s\n", fullPath); + filenameSink(fullPath[0 .. count]); + } + } + } + mem.xfree(cast(void*)name.ptr); + + } while (FindNextFileW(hFind, &ffd) != 0); + + if (fullPath != buf.ptr) + mem.xfree(fullPath); + FindClose(hFind); + return false; + } + else version (Posix) + { + DIR* dir = opendir(dir_path); + if (!dir) + return true; + + debug + enum BufLength = 10; // trigger any reallocation bugs + else + enum BufLength = 100; + char[BufLength + 1] buf = void; + char* fullPath = buf.ptr; + size_t fullPathLength = BufLength; + + dirent* entry; + while ((entry = readdir(dir)) != null) + { + //printf("entry: %s\n", entry.d_name.ptr); + if (entry.d_name[0] == '.') + continue; // ignore files that start with a . + + // fullPath = dir_path / entry.d_name.ptr + const dir_pathLength = strlen(dir_path); + const count = dir_pathLength + 1 + strlen(entry.d_name.ptr); + if (count > fullPathLength) + { + fullPathLength = count; + fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1); + } + memcpy(fullPath, dir_path, dir_pathLength); + fullPath[dir_pathLength] = '/'; + strcpy(fullPath + dir_pathLength + 1, entry.d_name.ptr); + + stat_t statbuf; + if (lstat(fullPath, &statbuf) == -1) + continue; + + const(char)[] name = entry.d_name.ptr[0 .. strlen(entry.d_name.ptr)]; // convert to D string + if (!name.length) + continue; // ignore + + if (S_ISDIR(statbuf.st_mode)) + { + if (recurse && !(name == "." || name == "..")) + findFiles(fullPath, exts, recurse, filenameSink); + } + else if (S_ISREG(statbuf.st_mode)) + { + foreach (ext; exts) + { + if (FileName.ext(name) == ext) + { + //printf("%s\n", fullPath); + filenameSink(fullPath[0 .. count]); + } + } + } + } + + if (fullPath != buf.ptr) + mem.xfree(fullPath); + closedir(dir); + return false; + } + else + static assert(0); +} diff --git a/compiler/src/dmd/file_manager.d b/compiler/src/dmd/file_manager.d index 6ad3d621708d..8e6cb43e316a 100644 --- a/compiler/src/dmd/file_manager.d +++ b/compiler/src/dmd/file_manager.d @@ -11,12 +11,18 @@ module dmd.file_manager; import core.stdc.stdio; +import core.stdc.string; + +import dmd.common.file; import dmd.common.outbuffer; +import dmd.root.array; import dmd.root.stringtable : StringTable; import dmd.root.file : File, Buffer; import dmd.root.filename : FileName, isDirSeparator; +import dmd.root.rmem; import dmd.root.string : toDString; import dmd.errors; +import dmd.errorsink; import dmd.globals; import dmd.identifier; import dmd.location; @@ -72,6 +78,13 @@ private struct PathStack } } +enum DIR : ubyte +{ + none, // path does not exist + disk, // path exists on the disk + archive, // path exists in the archive +} + /*************************** * Cache path lookups so the operating system * is only consulted once for each path. @@ -82,7 +95,7 @@ private struct PathCache * a b and c are directories, a, a/b, a/b/c are paths. */ - StringTable!(bool) pathStatus; // cached value of does a path exist or not + StringTable!(DIR) pathStatus; // cached result of does a path exist or not nothrow: @@ -91,18 +104,22 @@ private struct PathCache * Cache the results for the path and each left-justified subpath of the path. * Params: * filespec = path/filename + * fileManager = where the caches are * Returns: - * true if path exists, false if it does not + * DIR.none if path does not exist, DIR.disk if it exists on disk, DIR.archive if in archive */ - bool pathExists(const(char)[] filespec) nothrow + DIR pathExists(const(char)[] filespec, FileManager fileManager) nothrow { /* look for the longest leftmost parent path that is cached * by starting at the right and working to the left */ - bool exists = true; + DIR exists = DIR.disk; + bool inArchive = false; auto st = PathStack(filespec); while (st.up) { if (auto cached = pathStatus.lookup(st.cur)) { + if (cached.value == DIR.archive) + inArchive = true; exists = cached.value; break; } @@ -113,49 +130,114 @@ private struct PathCache * to the right of it do not exist */ while (st.down) { - if (!exists) - pathStatus.insert(st.cur, false); + if (exists == DIR.none) + pathStatus.insert(st.cur, DIR.none); else - exists = pathStatus.insert(st.cur, FileName.exists(st.cur) == 2).value; + { + //printf("pathStatus.insert %.*s\n", cast(int)st.cur.length, st.cur.ptr); + //printf("pathStatus.insert %s\n", st.cur.ptr); + //printf("FileName.exists() %d\n", FileName.exists(st.cur.ptr)); + if (inArchive) + { + auto cached = pathStatus.lookup(st.cur); + if (cached) + exists = cached.value; + else + { + pathStatus.insert(st.cur, DIR.none); + exists = DIR.none; + } + } + else + { + auto cached = pathStatus.insert(st.cur, FileName.exists(st.cur) == 2 ? DIR.disk : DIR.none); + if (cached) + exists = cached.value; + else + exists = DIR.disk; + } + } } return exists; } + /** + * Add each path in filespec to the pathStatus cache, if it is not already there + * The filename is skipped. + * Mark each path as either DIR.disk or DIR.archive. + * Params: + * filespec = path/filename + * dir = DIR.disk or DIR.archive + */ + void addPath(const(char)[] filespec, DIR dir) nothrow + { + /* look for the longest leftmost parent path + * by starting at the right and working to the left + */ + auto st = PathStack(filespec); + while (st.up) + { + if (auto cached = pathStatus.lookup(st.cur)) + { + if (cached.value) // subpath exists + break; // assume any more to the left also exist + } + pathStatus.insert(st.cur, dir); + } + } + /** * Ask if path ends in a directory. * Cache result for speed. * Params: * path = a path * Returns: - * true if it's a path, false if not + * DIR */ - bool isExistingPath(const char[] path) + DIR isExistingPath(const char[] path) { auto cached = pathStatus.lookup(path); if (!cached) - cached = pathStatus.insert(path, FileName.exists(path) == 2); + cached = pathStatus.insert(path, FileName.exists(path) == 2 ? DIR.disk : DIR.none); return cached.value; } } +/************************************************************** + */ + final class FileManager { private StringTable!(const(ubyte)[]) files; // contents of files indexed by file name private PathCache pathCache; + private bool useSourceArchive; // use .sar files if they exist + /// - public this () nothrow + public this() nothrow { this.files._init(); this.pathCache.pathStatus._init(); } nothrow: + + /********************************** + * Set useSourceArchive flag + * Params: + * useSourceArchive = value to set it to + */ + public void setUseSourceArchive(bool useSourceArchive) nothrow + { + //printf("setUseSourceArchive: %d\n", useSourceArchive); + this.useSourceArchive = useSourceArchive; + } + /******************************************** * Look for the source file if it's different from filename. - * Look for .di, .d, directory, and along global.path. + * Look for .di, .d, directory, along each path. * Does not open the file. * Params: * filename = as supplied by the user @@ -169,10 +251,27 @@ nothrow: //printf("lookForSourceFile(`%.*s`)\n", cast(int)filename.length, filename.ptr); /* Search along paths[] for .di file, then .d file. */ + + /****************** + * Check if file exists in the source archive or on disk. + * Params: + * filename = fqn of file + * dir = DIR + * Returns: + * true if it exists + */ + bool isFileExisting(const char[] filename, DIR dir) + { + if (dir == DIR.archive) + return files.lookup(filename) != null; + else + return files.lookup(filename) || FileName.exists(filename) == 1; + } + // see if we should check for the module locally. - bool checkLocal = pathCache.pathExists(filename); + DIR dirLocal = pathCache.pathExists(filename, this); const sdi = FileName.forceExt(filename, hdr_ext); - if (checkLocal && FileName.exists(sdi) == 1) + if (dirLocal != DIR.none && isFileExisting(sdi, dirLocal)) return sdi; scope(exit) FileName.free(sdi.ptr); @@ -180,11 +279,11 @@ nothrow: // Special file name representing `stdin`, always assume its presence if (sd == "__stdin.d") return sd; - if (checkLocal && FileName.exists(sd) == 1) + if (dirLocal && isFileExisting(sd, dirLocal)) return sd; scope(exit) FileName.free(sd.ptr); - if (checkLocal) + if (dirLocal) { if (pathCache.isExistingPath(filename)) { @@ -193,86 +292,100 @@ nothrow: * iff filename/package.d is a file */ const ni = FileName.combine(filename, package_di); - if (FileName.exists(ni) == 1) + if (isFileExisting(ni, dirLocal)) return ni; FileName.free(ni.ptr); const n = FileName.combine(filename, package_d); - if (FileName.exists(n) == 1) + if (isFileExisting(n, dirLocal)) return n; FileName.free(n.ptr); } } + // What about .c and .i files? if (FileName.absolute(filename)) return null; if (!paths.length) return null; - foreach (entry; paths) - { - const p = entry.toDString(); - - const(char)[] n = FileName.combine(p, sdi); - if (!pathCache.pathExists(n)) { - FileName.free(n.ptr); + foreach (path; paths) + { + //printf("path: %s\n", path); + const p = path.toDString(); + const(char)[] ndi = FileName.combine(p, sdi); + //printf("ndi: %.*s\n", cast(int)ndi.length, ndi.ptr); + + DIR dir = pathCache.pathExists(ndi, this); + //printf("dir: %d\n", dir); + if (dir == DIR.none) { + FileName.free(ndi.ptr); continue; // no need to check for anything else. } - if (FileName.exists(n) == 1) { - return n; + if (isFileExisting(ndi, dir)) { + return ndi; } - FileName.free(n.ptr); + FileName.free(ndi.ptr); - n = FileName.combine(p, sd); - if (FileName.exists(n) == 1) { - return n; + const(char)[] nd = FileName.combine(p, sd); + //printf("nd: %.*s\n", cast(int)nd.length, nd.ptr); + if (isFileExisting(nd, dir)) { + return nd; } - FileName.free(n.ptr); + FileName.free(nd.ptr); - n = FileName.combine(p, FileName.sansExt(filename)); - scope(exit) FileName.free(n.ptr); + /* Look for path/FileName/package.di, then path/FileName/package.d + */ + const(char)[] np = FileName.combine(p, FileName.sansExt(filename)); + scope(exit) FileName.free(np.ptr); - // also cache this if we are looking for package.d[i] - if (pathCache.isExistingPath(n)) + if (auto val = pathCache.isExistingPath(np)) { - const n2i = FileName.combine(n, package_di); - if (FileName.exists(n2i) == 1) - return n2i; - FileName.free(n2i.ptr); - const n2 = FileName.combine(n, package_d); - if (FileName.exists(n2) == 1) { - return n2; - } - FileName.free(n2.ptr); + const npdi = FileName.combine(np, package_di); + if (isFileExisting(npdi, val)) + return npdi; + FileName.free(npdi.ptr); + + const npd = FileName.combine(np, package_d); + if (isFileExisting(npd, val)) + return npd; + FileName.free(npd.ptr); } } - /* ImportC: No D modules found, now search along paths[] for .i file, then .c file. + /* ImportC: No D modules found, now repeat search for .i file, then .c file. + * Same as code above, sans the package search. */ const si = FileName.forceExt(filename, i_ext); - if (FileName.exists(si) == 1) + if (dirLocal != DIR.none && isFileExisting(si, dirLocal)) return si; scope(exit) FileName.free(si.ptr); const sc = FileName.forceExt(filename, c_ext); - if (FileName.exists(sc) == 1) + if (dirLocal != DIR.none && isFileExisting(sc, dirLocal)) return sc; scope(exit) FileName.free(sc.ptr); - foreach (entry; paths) + + foreach (path; paths) { - const p = entry.toDString(); + const p = path.toDString(); - const(char)[] n = FileName.combine(p, si); - if (FileName.exists(n) == 1) { - return n; + const(char)[] ni = FileName.combine(p, si); + DIR dir = pathCache.pathExists(ni, this); + if (dir == DIR.none) { + FileName.free(ni.ptr); + continue; // no need to check for anything else. } - FileName.free(n.ptr); - n = FileName.combine(p, sc); - if (FileName.exists(n) == 1) { - return n; + if (isFileExisting(ni, dir)) { + return ni; } - FileName.free(n.ptr); + FileName.free(ni.ptr); + + const(char)[] nc = FileName.combine(p, sc); + if (isFileExisting(nc, dir)) + return nc; + FileName.free(nc.ptr); } return null; } @@ -290,7 +403,10 @@ nothrow: { const name = filename.toString; if (auto val = files.lookup(name)) // if `name` is cached + { + //printf("File.read() cached %.*s, %p[%d]\n", cast(int)name.length, name.ptr, val.value.ptr, cast(int)val.value.length); return val.value; // return its contents + } OutBuffer buf; if (name == "__stdin.d") // special name for reading from stdin @@ -303,6 +419,7 @@ nothrow: if (FileName.exists(name) != 1) // if not an ordinary file return null; + //printf("File.read() %.*s\n", cast(int)name.length, name.ptr); if (File.read(name, buf)) return null; // failed } @@ -330,6 +447,21 @@ nothrow: auto val = files.insert(filename.toString, buffer); return val == null ? null : val.value; } + + /** + * Add the file's name and contents to the cache. + * Params: + * fileName = fqn of the file + * contents = the file contents + */ + void addSarFileNameAndContents(const(char)[] fileName, const(ubyte)[] contents) + { + //printf("addSarFileNameAndContents() %.*s\n", cast(int)fileName.length, fileName.ptr); + /* Unseen subpaths will be within a .sar archive + */ + pathCache.addPath(fileName, DIR.archive); + files.insert(fileName, contents); + } } private bool readFromStdin(ref OutBuffer sink) nothrow @@ -364,3 +496,270 @@ private bool readFromStdin(ref OutBuffer sink) nothrow assert(0); } + +/************************************************* + * Look along paths[] for archive files. + * For each one found, add its contents to the path and file caches. + */ +void findAllArchives(FileManager fileManager, const char*[] paths) +{ + if (!fileManager.useSourceArchive) + return; + + const(char)[][1] exts = [ sar_ext ]; + + foreach (path; paths) + { + //printf("path: %s\n", path); + const(char)[] spath = path[0 .. strlen(path)]; + + // Remove any trailing separator + if (spath.length) + { + const c = spath[spath.length - 1]; + if (c == '/' || c == '\\') + spath = spath[0 .. $ - 1]; + } + + void arSink(const(char)[] archiveFile) + { + enum log = false; + if (log) printf("arSink() %.*s\n", cast(int)archiveFile.length, archiveFile.ptr); + readSourceArchive(fileManager, spath, FileName.name(archiveFile), null, global.errorSink, global.params.v.verbose); + } + + findFiles(path, exts, false, &arSink); + } +} + +/******************************************** + * Read all the modules, and build one giant cache file out of them. + * Params: + * pathPackage = the path/package to build a cache file for + * Returns: + * true = failed + * false = success + */ +bool writeSourceArchive(const(char)[] pathPackage) +{ + enum log = false; + if (log) printf("writeSourceArchive() %.*s\n", cast(int)pathPackage.length, pathPackage.ptr); + + const(char)[] name = FileName.name(pathPackage); + const size_t nameStart = pathPackage.length - name.length; + + Array!(const(char)*) fileNames; + + void accumulate(scope const(char)[] filename) nothrow + { + if (log) printf("%.*s\n", cast(int)filename.length, filename.ptr); + fileNames.push(xarraydup(filename).ptr); + } + + immutable string[4] exts = ["d", "di", "c", "i"]; // archive files with these extensions + + const(char)[] dir_path = xarraydup(pathPackage); + + if (findFiles(dir_path.ptr, exts, true, &accumulate)) + return true; + + OutBuffer ar; // ar will hold the contents of the archive file + + SrcArchiveHeader srcArchiveHeader; + srcArchiveHeader.contentsOffset = cast(uint)SrcArchiveHeader.sizeof; + srcArchiveHeader.contentsLength = cast(uint)fileNames.length; + + ar.write(&srcArchiveHeader, srcArchiveHeader.sizeof); + + /* Allocate in ar the array of Content + */ + size_t length = fileNames.length; + ar.allocate(length * Content.sizeof); + + /* Temporary array of Content + */ + Content[] contents = (cast(Content*)mem.xmalloc(length * Content.sizeof))[0 .. length]; + + /* Write the name strings to ar + */ + foreach (i, ref content; contents[]) + { + const(char)* fnp = fileNames[i]; + const(char)[] fn = fnp[nameStart .. strlen(fnp)]; // slice off path/ prefix + if (log) printf("fn: %.*s\n", cast(int)fn.length, fn.ptr); + + content.nameOffset = cast(uint)ar.length; + content.nameLength = cast(uint)fn.length; + //printf("1content.nameOffset[%lld]: %d nameLength: %d\n", i, content.nameOffset, content.nameLength); + + ar.writeStringz(fn); + } + + /* Read the files, and write their contents to ar, along with + * a terminating 4 bytes of 0 (to accommodate dchar source files) + */ + OutBuffer fb; + foreach (i, ref content; contents[]) + { + fb.reset(); // recycle read buffer + File.read(fileNames[i][0 .. strlen(fileNames[i])], fb); // read file into fb + content.importOffset = cast(uint)ar.length; + content.importLength = cast(uint)fb.length; // don't include terminating 0 + ar.write(fb.peekSlice()); // append file contents to ar + ar.write32(0); // append 4 bytes of 0 + } + + /* Copy contents[] into ar + */ + Content[] cfcontents = (cast(Content*)(ar.peekSlice().ptr + srcArchiveHeader.contentsOffset))[0 .. length]; + cfcontents[] = contents[]; + mem.xfree(contents.ptr); + + /* Create source archive file name by appending ".sar" to pathPackage + */ + auto sourceArchiveFileName = FileName.addExt(pathPackage, sar_ext); + + /* write the source archive file + */ + if (log) printf("writing sourceArchive file %.*s\n", cast(int)sourceArchiveFileName.length, sourceArchiveFileName.ptr); + if (!writeFile(sourceArchiveFileName.ptr, ar.peekSlice())) + return true; // failure + + return false; // success +} + +/********************************************** + * Read the source archive file, making it a memory mapped file. + * Fill the FileManager.pathCache.pathStatus for each of the paths in the source archive, + * and the contents of the files in the archive go in FileManager.files. + * Since it is a memory mapped file, the contents are only read when the module is + * actually imported. + * Params: + * fileManager = manages files + * path = path to location of .sar file + * pkg = a package name the path leads to + * fnSink = send file names in .sar file to fnSink + * eSink = where messages go + * verbose = verbose output + * Returns: + * true = success + * false = failure + */ +nothrow +bool readSourceArchive(FileManager fileManager, const(char)[] path, const(char)[] pkg, void delegate(const(char)[]) nothrow fnSink, + ErrorSink eSink, bool verbose) +{ + //printf("reading .sar file path: '%.*s', pkg: '%.*s')\n", cast(int)path.length, path.ptr, cast(int)pkg.length, pkg.ptr); + enum log = false; + if (log) + eSink.message(Loc.initial, "reading .sar file path: '%.*s', pkg: '%.*s')", cast(int)path.length, path.ptr, cast(int)pkg.length, pkg.ptr); + + /* Combine path, pkg, and .sar into path/pkg.sar + */ + const(char)[] pathPackage = FileName.combine(path, pkg); + const(char)[] sourceArchiveFileName = FileName.defaultExt(pathPackage, sar_ext); + + /* add the path to the archive file as a disk directory + */ + fileManager.pathCache.addPath(sourceArchiveFileName, DIR.disk); + + /* Open memory mapped file on sourceArchiveFileName + */ + static if (1) // memory mapped file + { + //fprintf(stderr, "map file '%s'\n", sourceArchiveFileName.ptr); + auto mmFile = new FileMapping!(const char)(sourceArchiveFileName.ptr); + auto data = (*mmFile)[]; // all the data in the file + } + else // regular file read + { + OutBuffer buf; + if (File.read(sourceArchiveFileName, buf)) + { + if (log) + eSink.message(Loc.initial, " .sar file %s not found", sourceArchiveFileName.ptr); + return false; // empty files are ok + } + auto data = buf.extractSlice(true); // all the data in the file + } + if (data.length == 0) + { + if (log) + eSink.message(Loc.initial, " .sar file is empty"); + return false; // empty files are ok + } + + if (verbose) + eSink.message(Loc.initial, "reading source archive '%.*s%s%.*s.sar' with files:)", + cast(int)path.length, path.ptr, + (path.length ? "/".ptr : "".ptr), + cast(int)pkg.length, pkg.ptr); + + SrcArchiveHeader* srcArchiveHeader = cast(SrcArchiveHeader*)data.ptr; + if (data.length < SrcArchiveHeader.sizeof || + srcArchiveHeader.magicNumber != SrcArchiveHeader.MagicNumber || + srcArchiveHeader.versionNumber != 1 || + data.length < srcArchiveHeader.contentsOffset || + srcArchiveHeader.contentsLength >= uint.max / Content.sizeof - srcArchiveHeader.contentsOffset || // overflow check + data.length < srcArchiveHeader.contentsOffset + srcArchiveHeader.contentsLength * Content.sizeof) + { + eSink.error(Loc.initial, "corrupt .sar file header"); + return false; // corrupt file + } + + foreach (i; 0 .. srcArchiveHeader.contentsLength) + { + const Content* cp = cast(const(Content)*)(data.ptr + srcArchiveHeader.contentsOffset + i * Content.sizeof); + + if (data.length <= cp.nameOffset || + cp.nameOffset >= cp.nameOffset + cp.nameLength + 1 || + data.length <= cp.nameOffset + cp.nameLength + 1 || + data.length <= cp.importOffset || + cp.importOffset >= cp.importOffset + cp.importLength + 1 || + data.length <= cp.importOffset + cp.importLength + 1) + { + eSink.error(Loc.initial, "corrupt .sar file contents"); + return false; // corrupt file + } + + const(char)[] fileName = (data.ptr + cp.nameOffset)[0 .. cp.nameLength]; + const(ubyte)[] fileContents = (cast(const(ubyte)*)(data.ptr + cp.importOffset))[0 .. cp.importLength]; + + //fprintf(stderr, "fileContents: %s %p[%d]\n", fileName.ptr, fileContents.ptr, cast(int)fileContents.length); + //fprintf(stderr, "fileContents: %s\n", fileContents.ptr); + + // Cache file name and file contents (but don't read the file contents!) + const(char)[] fqn = FileName.combine(path, fileName); + fileManager.addSarFileNameAndContents(fqn, fileContents); + if (fnSink) + fnSink(fqn); + if (verbose) + eSink.message(Loc.initial, " %.*s", cast(int)fqn.length, fqn.ptr); + } + if (verbose) + eSink.message(Loc.initial, "done reading source archive"); + + return true; +} + +/*************************** + * The source archive file starts with this header. + */ +struct SrcArchiveHeader +{ + enum MagicNumber = 0x64FE_ED63; + uint magicNumber = MagicNumber; // don't collide with other file types + uint versionNumber = 1; // so we can change the format + uint contentsOffset; // file offset to start of contents section + uint contentsLength; // the number of contents in the contents section +} + +/* The contents section is an array of Contents + */ +struct Content +{ + uint nameOffset; // file offset to name of file + uint nameLength; // number of characters in name, excluding terminating 0 + uint importOffset; // file offset to start of imported file contents + uint importLength; // number of characters in the imported file contents, excluding terminating 0 +} diff --git a/compiler/src/dmd/frontend.h b/compiler/src/dmd/frontend.h index c4394042077b..1440d3f4b4c3 100644 --- a/compiler/src/dmd/frontend.h +++ b/compiler/src/dmd/frontend.h @@ -7947,6 +7947,7 @@ struct Help final bool checkAction; bool revert; bool preview; + bool sar; bool externStd; bool hc; Help() : @@ -7958,11 +7959,12 @@ struct Help final checkAction(), revert(), preview(), + sar(), externStd(), hc() { } - Help(bool manual, bool usage = false, bool mcpu = false, bool transition = false, bool check = false, bool checkAction = false, bool revert = false, bool preview = false, bool externStd = false, bool hc = false) : + Help(bool manual, bool usage = false, bool mcpu = false, bool transition = false, bool check = false, bool checkAction = false, bool revert = false, bool preview = false, bool sar = false, bool externStd = false, bool hc = false) : manual(manual), usage(usage), mcpu(mcpu), @@ -7971,6 +7973,7 @@ struct Help final checkAction(checkAction), revert(revert), preview(preview), + sar(sar), externStd(externStd), hc(hc) {} @@ -8062,6 +8065,7 @@ struct Param final bool addMain; bool allInst; bool bitfields; + bool useSourceArchive; CppStdRevision cplusplus; Help help; Verbose v; @@ -8097,6 +8101,7 @@ struct Param final _d_dynamicArray< const char > objdir; _d_dynamicArray< const char > objname; _d_dynamicArray< const char > libname; + _d_dynamicArray< const char > pathPackage; Output ddoc; Output dihdr; Output cxxhdr; @@ -8145,6 +8150,7 @@ struct Param final addMain(), allInst(), bitfields(), + useSourceArchive(true), cplusplus((CppStdRevision)201103u), help(), v(), @@ -8174,6 +8180,7 @@ struct Param final objdir(), objname(), libname(), + pathPackage(), ddoc(), dihdr(), cxxhdr(), @@ -8198,7 +8205,7 @@ struct Param final mapfile() { } - Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, CLIIdentifierTable dIdentifierTable = (CLIIdentifierTable)0u, CLIIdentifierTable cIdentifierTable = (CLIIdentifierTable)0u, _d_dynamicArray< const char > argv0 = {}, Array modFileAliasStrings = Array(), Array imppath = Array(), Array fileImppath = Array(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array runargs = Array(), Array cppswitches = Array(), const char* cpp = nullptr, Array objfiles = Array(), Array linkswitches = Array(), Array linkswitchIsForCC = Array(), Array libfiles = Array(), Array dllfiles = Array(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) : + Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, bool useSourceArchive = true, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, CLIIdentifierTable dIdentifierTable = (CLIIdentifierTable)0u, CLIIdentifierTable cIdentifierTable = (CLIIdentifierTable)0u, _d_dynamicArray< const char > argv0 = {}, Array modFileAliasStrings = Array(), Array imppath = Array(), Array fileImppath = Array(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, _d_dynamicArray< const char > pathPackage = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array runargs = Array(), Array cppswitches = Array(), const char* cpp = nullptr, Array objfiles = Array(), Array linkswitches = Array(), Array linkswitchIsForCC = Array(), Array libfiles = Array(), Array dllfiles = Array(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) : obj(obj), multiobj(multiobj), trace(trace), @@ -8222,6 +8229,7 @@ struct Param final addMain(addMain), allInst(allInst), bitfields(bitfields), + useSourceArchive(useSourceArchive), cplusplus(cplusplus), help(help), v(v), @@ -8257,6 +8265,7 @@ struct Param final objdir(objdir), objname(objname), libname(libname), + pathPackage(pathPackage), ddoc(ddoc), dihdr(dihdr), cxxhdr(cxxhdr), diff --git a/compiler/src/dmd/globals.d b/compiler/src/dmd/globals.d index f8291fa5ce7e..b1486ce1324e 100644 --- a/compiler/src/dmd/globals.d +++ b/compiler/src/dmd/globals.d @@ -108,6 +108,7 @@ extern(C++) struct Help bool checkAction; // -checkaction bool revert; // -revert bool preview; // -preview + bool sar; // -sar bool externStd; // -extern-std bool hc; // -HC } @@ -170,6 +171,7 @@ extern (C++) struct Param bool addMain; // add a default main() function bool allInst; // generate code for all template instantiations bool bitfields; // support C style bit fields + bool useSourceArchive = true; // read source files from source archive library (.sar file) CppStdRevision cplusplus = CppStdRevision.cpp11; // version of C++ standard to support @@ -219,6 +221,7 @@ extern (C++) struct Param const(char)[] objdir; // .obj/.lib file output directory const(char)[] objname; // .obj file output name const(char)[] libname; // .lib file output name + const(char)[] pathPackage; // path/package for path/package.sar file Output ddoc; // Generate embedded documentation comments Output dihdr; // Generate `.di` 'header' files @@ -264,6 +267,7 @@ enum json_ext = "json"; // for JSON files enum map_ext = "map"; // for .map files enum c_ext = "c"; // for C source files enum i_ext = "i"; // for preprocessed C source file +enum sar_ext = "sar"; // file extension for source archive /** * Collection of global compiler settings and global state used by the frontend diff --git a/compiler/src/dmd/globals.h b/compiler/src/dmd/globals.h index ac2b2867fb7c..063005505842 100644 --- a/compiler/src/dmd/globals.h +++ b/compiler/src/dmd/globals.h @@ -119,6 +119,7 @@ struct Help d_bool checkAction; // -checkaction d_bool revert; // -revert d_bool preview; // -preview + d_bool sar; // -sar d_bool externStd; // -extern-std d_bool hc; // -HC }; @@ -173,6 +174,8 @@ struct Param d_bool addMain; // add a default main() function d_bool allInst; // generate code for all template instantiations d_bool bitfields; // support C style bit fields + d_bool useSourceArchive; // read source files from source archive library (.sar file) + CppStdRevision cplusplus; // version of C++ name mangling to support Help help; @@ -221,6 +224,7 @@ struct Param DString objdir; // .obj/.lib file output directory DString objname; // .obj file output name DString libname; // .lib file output name + DString pathPackage; // path to .sar source archive file Output ddoc; // Generate embedded documentation comments Output dihdr; // Generate `.di` 'header' files diff --git a/compiler/src/dmd/main.d b/compiler/src/dmd/main.d index 27f79379abae..1b0e06122565 100644 --- a/compiler/src/dmd/main.d +++ b/compiler/src/dmd/main.d @@ -320,6 +320,39 @@ private int tryMain(size_t argc, const(char)** argv, ref Param params) { fatal(); } + if (params.pathPackage.length) + { + if (writeSourceArchive(params.pathPackage)) + fatal(); + return EXIT_SUCCESS; + } + + /* Look for file(s) with the extension .sar, and + expand them in-place in files[] + */ + for (size_t i = 0; i < files.length; ++i) // don't use foreach()! + { + auto file = files[i]; + if (FileName.equalsExt(file, sar_ext)) + { +//fprintf(stderr, "found .sar file\n"); + void fnSink(const(char)[] name) nothrow + { +//fprintf(stderr, "fnSink %s\n", name.ptr); + assert(*(name.ptr + name.length) == 0); + files.insert(i, name.ptr); + } + + const(char)[] filename = file[0 .. strlen(file)]; + const(char)[] name = FileName.sansExt(FileName.name(filename)); + const(char)[] path = filename.length == name.length + 4 ? null : filename[0 .. $ - (name.length + 1 + 4)]; + files.remove(i); + readSourceArchive(global.fileManager, path, name, &fnSink, global.errorSink, global.params.v.verbose); + } + } + if (0 && global.params.v.verbose) + foreach (file; files[]) fprintf(stderr, "file: %s\n", file); + if (files.length == 0) { if (params.jsonFieldFlags) @@ -383,6 +416,10 @@ private int tryMain(size_t argc, const(char)** argv, ref Param params) buildPath(params.imppath, global.path); buildPath(params.fileImppath, global.filePath); + /* Look along the path for any archive files, and load them + */ + findAllArchives(global.fileManager, global.path[]); + // Create Modules Modules modules = createModules(files, libmodules, target); // Read files @@ -827,6 +864,7 @@ bool parseCommandlineAndConfig(size_t argc, const(char)** argv, ref Param params envsection.ptr, cast(int)global.inifilename.length, global.inifilename.ptr); global.preprocess = &preprocess; + global.fileManager.setUseSourceArchive(global.params.useSourceArchive); return false; } diff --git a/compiler/src/dmd/mars.d b/compiler/src/dmd/mars.d index 6d4af13dda0c..bb22b8488794 100644 --- a/compiler/src/dmd/mars.d +++ b/compiler/src/dmd/mars.d @@ -588,7 +588,7 @@ bool parseCommandLine(const ref Strings arguments, const size_t argc, ref Param { foreach (i, arg; arguments[]) { - printf("arguments[%d] = '%s'\n", cast(int)i, arguments[i]); + printf("arguments[%d] = '%s'\n", cast(int)i, arg); } } @@ -1206,6 +1206,28 @@ bool parseCommandLine(const ref Strings arguments, const size_t argc, ref Param return false; } } + else if (startsWith(p + 1, "sar")) // https://dlang.org/dmd.html#switch-sar + { + /* Parse: + * -sar=on|off|path/package.sar + */ + enum len = "-sar=".length; + mixin(checkOptionsMixin("sar", + "`-sar=` requires a behavior")); + switch (arg[len .. $]) + { + case "on": + params.useSourceArchive = true; + break; + case "off": + params.useSourceArchive = false; + break; + default: + params.useSourceArchive = false; + params.pathPackage = arg[len .. $]; + break; + } + } else if (arg == "-w") // https://dlang.org/dmd.html#switch-w params.warnings = DiagnosticReporting.error; else if (arg == "-wi") // https://dlang.org/dmd.html#switch-wi diff --git a/compiler/test/runnable/extra-files/archive/sample.d b/compiler/test/runnable/extra-files/archive/sample.d new file mode 100644 index 000000000000..88d5370c009e --- /dev/null +++ b/compiler/test/runnable/extra-files/archive/sample.d @@ -0,0 +1,7 @@ +// Sample for testing .sar files + +module archive.sample; + +enum X = 3; + +void func() { } diff --git a/compiler/test/runnable/extra-files/testsar1.d b/compiler/test/runnable/extra-files/testsar1.d new file mode 100644 index 000000000000..8536148b9bb5 --- /dev/null +++ b/compiler/test/runnable/extra-files/testsar1.d @@ -0,0 +1,9 @@ +// test ability to read imported .sar files + +import archive.sample; + +int main() +{ + assert(X == 3); + return 0; +} diff --git a/compiler/test/runnable/extra-files/testsar2.d b/compiler/test/runnable/extra-files/testsar2.d new file mode 100644 index 000000000000..a6317796632f --- /dev/null +++ b/compiler/test/runnable/extra-files/testsar2.d @@ -0,0 +1,11 @@ +/* REQUIRED_ARGS: imports/archive.sar + */ +// test ability to read .sar files on the command line + +import archive.sample; + +int main() +{ + func(); + return 0; +} diff --git a/compiler/test/runnable/testsar.sh b/compiler/test/runnable/testsar.sh new file mode 100644 index 000000000000..6e172711bb1b --- /dev/null +++ b/compiler/test/runnable/testsar.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# create extra-files/archive.sar from extra-files/archive/sample.d +${DMD} -sar=${EXTRA_FILES}${SEP}archive + +# ensure .sar file on command line works +${DMD} -m${MODEL} ${EXTRA_FILES}${SEP}testsar2.d ${EXTRA_FILES}${SEP}archive.sar -of${OUTPUT_BASE}${EXE} +${OUTPUT_BASE}${EXE} + +# ensure .sar file from import works +${DMD} -m${MODEL} ${EXTRA_FILES}${SEP}testsar1.d -I${EXTRA_FILES} -of${OUTPUT_BASE}${EXE} +${OUTPUT_BASE}${EXE} + +# remove generated .obj, .exe, and archive.sar files +rm ${OUTPUT_BASE}{${OBJ},${EXE}} ${EXTRA_FILES}${SEP}archive.sar + +exit 0