Skip to content

Commit

Permalink
Support library source file archives
Browse files Browse the repository at this point in the history
  • Loading branch information
WalterBright committed Apr 14, 2024
1 parent 06db0bf commit 883de17
Show file tree
Hide file tree
Showing 13 changed files with 841 additions and 61 deletions.
79 changes: 79 additions & 0 deletions changelog/dmd.source-archive.dd
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Support Source Archive Files

This is a compiler feature, not a D language feature.

Similar to how libraries of object files are made available to the linker,
this adds source archive file support to the compiler. Any package (and all its
sub-files) can become a source archive file. The source archive file is then
supplied to the compiler rather than a directory with lots of files in it.

This means, for example, that all of Phobos can be distributed as a single
file, std.sar. (The .sar extension stands for "source archive".) If std.sar
is in a path along the import path list supplied to the compiler, the
compiler will prefer std.sar to looking through the std directory tree for the
sub-modules. The std directory wouldn't even need to exist.

All the source files in a project or library can be represented as a single
file, making it easy to deal with.

The file format of the .sar file is very similar to that of object file libraries
and various other schemes. It does not adhere to those other schemes due to their
variances from platform to platform, all the code needed to support things that
are unneeded for .sar files, and special consideration for D's needs. The format
is meant to be friendly for memory-mapped file access, and does not have alignment
issues.

A .sar file consists of the following sections:

1. a header, to identify it as a .sar file with a magic number and a version

2. a table of contents, one entry per source file. The entries consist of
an offset/length to the filename string, and an offset/length to the file
contents

3. the filename strings, each string has a terminating 0

4. the file contents, each file has four 0 bytes appended, as the lexer wants
that as a sentinel

5. the integers in the format are native-endian (which is little-endian for x86)

To create a .sar file, such as one for Phobos' std:

dmd -sar=/home/duser/dmd/src/phobos/std

and the file:

/home/duser/dmd/src/phobos/std.sar

will be created and filled with all sub-files with one of the extensions ".di", ".d",
".c", or ".i".

For Phobos, std.sar is approximately 11 megabytes in size.

To use the std.sar file, nothing needs to be changed in the user's build system.
dmd will automatically prefer using any .sar file it finds. To disable using
.sar files, which would be necessary when doing development of the source files,
select one of the following:

1. delete the .sar file

2. use the -sar=off compiler switch. -sar=on turns it on, and is the default
setting

Trying out .sar with simple programs like hello world yield a negligible difference
in compile speed. It's unlikely a larger program would show any particular trend
in performance.

A standalone archiver program can be easily created from the implementation in DMD.

Another way to use a .sar file is to simply add it to the command line:

dmd foo.sar

If foo.sar contains the files a.d, b.d and c.d, the command is equivalent to:

dmd a.d b.d c.d

I.e .sar files are simply a way to "bundle" a bunch of source files into a single
file.
12 changes: 12 additions & 0 deletions compiler/src/dmd/cli.d
Original file line number Diff line number Diff line change
Expand Up @@ -766,6 +766,18 @@ dmd -cov -unittest myprog.d
`$(UNIX Generate shared library)
$(WINDOWS Generate DLL library)`,
),
Option("sar=[on|off|<path/package>]",
"turn reading source archive files on or off, or create source archive at <path/package.sar>",
`Controls source archive files and usage.
$(UL
$(LI $(I on): use source archive files (default))
$(LI $(I off): ignore source archive files)
$(LI $(I path/package): create source archive file.
<path> is where the root package of the files to be archived are.
All the modules in <package> are written to the source archive file <path/package.sar>.
Do not use in combination with compiling, as that will be very slow.)
)`
),
Option("target=<triple>",
"use <triple> as <arch>-[<vendor>-]<os>[-<cenv>[-<cppenv]]",
"$(I arch) is the architecture: either `x86`, `x64`, `x86_64` or `x32`,
Expand Down
169 changes: 169 additions & 0 deletions compiler/src/dmd/common/file.d
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,23 @@

module dmd.common.file;

import core.stdc.stdio;
import core.stdc.stdlib;
import core.stdc.string;
import core.stdc.limits;

import core.stdc.errno : errno;
import core.stdc.stdio : fprintf, remove, rename, stderr;
import core.stdc.stdlib;
import core.stdc.string : strerror, strlen, memcpy;

import dmd.common.smallbuffer;
import dmd.root.filename;
import dmd.root.rmem;

version (Windows)
{
import core.stdc.wchar_;
import core.sys.windows.winbase;
import core.sys.windows.winnls : CP_ACP;
import core.sys.windows.winnt;
Expand All @@ -32,6 +40,7 @@ version (Windows)
}
else version (Posix)
{
import core.sys.posix.dirent;
import core.sys.posix.fcntl;
import core.sys.posix.sys.mman;
import core.sys.posix.sys.stat;
Expand Down Expand Up @@ -587,3 +596,163 @@ private auto ref fakePure(F)(scope F fun) pure
mixin("alias PureFun = " ~ F.stringof ~ " pure;");
return (cast(PureFun) fun)();
}

/***********************************
* Recursively search all the directories and files under dir_path
* for files that match one of the extensions in exts[].
* Pass the matches to sink.
* Params:
* dir_path = root of directories to search
* exts = array of filename extensions to match
* recurse = go into subdirectories
* filenameSink = accepts the resulting matches
* Returns:
* true for failed to open the directory
*/
bool findFiles(const char* dir_path, const char[][] exts, bool recurse, void delegate(const(char)[]) nothrow filenameSink)
{
enum log = false;
if (log) printf("findFiles() dir_path: %s\n", dir_path);
version (Windows)
{
debug
enum BufLength = 10; // trigger any reallocation bugs
else
enum BufLength = 100;
char[BufLength + 1] buf = void;
char* fullPath = buf.ptr;
size_t fullPathLength = BufLength;

// fullPath = dir_path \ *.*
const dir_pathLength = strlen(dir_path);
auto count = dir_pathLength + 1 + 3;
if (count > fullPathLength)
{
fullPathLength = count;
fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1);
}
memcpy(fullPath, dir_path, dir_pathLength);
strcpy(fullPath + dir_pathLength, "\\*.*".ptr);

if (log) printf("fullPath: %s\n", fullPath);

WIN32_FIND_DATAW ffd = void;
HANDLE hFind = fullPath[0 .. strlen(fullPath)].extendedPathThen!(p => FindFirstFileW(p.ptr, &ffd));
if (hFind == INVALID_HANDLE_VALUE)
return true;

do
{
if (log) wprintf("ffd.cFileName: %s\n", ffd.cFileName.ptr);
if (ffd.cFileName[0] == 0)
continue; // ignore

Check warning on line 648 in compiler/src/dmd/common/file.d

View check run for this annotation

Codecov / codecov/patch

compiler/src/dmd/common/file.d#L648

Added line #L648 was not covered by tests

if (ffd.cFileName[0] == '.')
continue; // ignore files that start with a ., also ignore . and .. directories

const(char)[] name = toNarrowStringz(ffd.cFileName[0 .. wcslen(ffd.cFileName.ptr)], null);
if (log) printf("name: %s\n", name.ptr);

// fullPath = dir_path \ name.ptr
count = dir_pathLength + 1 + name.length;
if (count > fullPathLength)
{
fullPathLength = count;
fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1);
}
strcpy(fullPath + dir_pathLength + 1, name.ptr);

if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
{
if (recurse)
findFiles(fullPath, exts, recurse, filenameSink);

Check warning on line 668 in compiler/src/dmd/common/file.d

View check run for this annotation

Codecov / codecov/patch

compiler/src/dmd/common/file.d#L668

Added line #L668 was not covered by tests
}
else
{
const(char)[] nameExt = FileName.ext(name);
foreach (ext; exts[])
{
if (nameExt == ext)
{
if (log) printf("adding %s\n", fullPath);
filenameSink(fullPath[0 .. count]);
}
}
}
mem.xfree(cast(void*)name.ptr);

} while (FindNextFileW(hFind, &ffd) != 0);

if (fullPath != buf.ptr)
mem.xfree(fullPath);
FindClose(hFind);
if (log) printf("findFiles() exit\n");
return false;
}
else version (Posix)
{
DIR* dir = opendir(dir_path);
if (!dir)
return true;

debug
enum BufLength = 10; // trigger any reallocation bugs
else
enum BufLength = 100;
char[BufLength + 1] buf = void;
char* fullPath = buf.ptr;
size_t fullPathLength = BufLength;

dirent* entry;
while ((entry = readdir(dir)) != null)
{
//printf("entry: %s\n", entry.d_name.ptr);
if (entry.d_name[0] == '.')
continue; // ignore files that start with a .

// fullPath = dir_path / entry.d_name.ptr
const dir_pathLength = strlen(dir_path);
const count = dir_pathLength + 1 + strlen(entry.d_name.ptr);
if (count > fullPathLength)
{
fullPathLength = count;
fullPath = cast(char*)Mem.xrealloc_noscan(fullPath == buf.ptr ? null : fullPath, fullPathLength + 1);
}
memcpy(fullPath, dir_path, dir_pathLength);
fullPath[dir_pathLength] = '/';
strcpy(fullPath + dir_pathLength + 1, entry.d_name.ptr);

stat_t statbuf;
if (lstat(fullPath, &statbuf) == -1)
continue;

const(char)[] name = entry.d_name.ptr[0 .. strlen(entry.d_name.ptr)]; // convert to D string
if (!name.length)
continue; // ignore

if (S_ISDIR(statbuf.st_mode))
{
if (recurse && !(name == "." || name == ".."))
findFiles(fullPath, exts, recurse, filenameSink);
}
else if (S_ISREG(statbuf.st_mode))
{
foreach (ext; exts)
{
if (FileName.ext(name) == ext)
{
//printf("%s\n", fullPath);
filenameSink(fullPath[0 .. count]);
}
}
}
}

if (fullPath != buf.ptr)
mem.xfree(fullPath);
closedir(dir);
return false;
}
else
static assert(0);
}

0 comments on commit 883de17

Please sign in to comment.