diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index c18a748de265..c4145a9f3f75 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -2,77 +2,6 @@ //! This is not a general-purpose cache. It is designed to be fast and simple, //! not to withstand attacks using specially-crafted input. -pub const Directory = struct { - /// This field is redundant for operations that can act on the open directory handle - /// directly, but it is needed when passing the directory to a child process. - /// `null` means cwd. - path: ?[]const u8, - handle: fs.Dir, - - pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory { - return .{ - .path = if (d.path) |p| try arena.dupe(u8, p) else null, - .handle = d.handle, - }; - } - - pub fn cwd() Directory { - return .{ - .path = null, - .handle = fs.cwd(), - }; - } - - pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 { - if (self.path) |p| { - // TODO clean way to do this with only 1 allocation - const part2 = try fs.path.join(allocator, paths); - defer allocator.free(part2); - return fs.path.join(allocator, &[_][]const u8{ p, part2 }); - } else { - return fs.path.join(allocator, paths); - } - } - - pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 { - if (self.path) |p| { - // TODO clean way to do this with only 1 allocation - const part2 = try fs.path.join(allocator, paths); - defer allocator.free(part2); - return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 }); - } else { - return fs.path.joinZ(allocator, paths); - } - } - - /// Whether or not the handle should be closed, or the path should be freed - /// is determined by usage, however this function is provided for convenience - /// if it happens to be what the caller needs. - pub fn closeAndFree(self: *Directory, gpa: Allocator) void { - self.handle.close(); - if (self.path) |p| gpa.free(p); - self.* = undefined; - } - - pub fn format( - self: Directory, - comptime fmt_string: []const u8, - options: fmt.FormatOptions, - writer: anytype, - ) !void { - _ = options; - if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self); - if (self.path) |p| { - try writer.writeAll(p); - try writer.writeAll(fs.path.sep_str); - } - } - - pub fn eql(self: Directory, other: Directory) bool { - return self.handle.fd == other.handle.fd; - } -}; - gpa: Allocator, manifest_dir: fs.Dir, hash: HashHelper = .{}, @@ -88,6 +17,8 @@ mutex: std.Thread.Mutex = .{}, prefixes_buffer: [4]Directory = undefined, prefixes_len: usize = 0, +pub const Path = @import("Cache/Path.zig"); +pub const Directory = @import("Cache/Directory.zig"); pub const DepTokenizer = @import("Cache/DepTokenizer.zig"); const Cache = @This(); @@ -124,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory { const PrefixedPath = struct { prefix: u8, - sub_path: []u8, + sub_path: []const u8, + + fn eql(a: PrefixedPath, b: PrefixedPath) bool { + return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path); + } + + fn hash(pp: PrefixedPath) u32 { + return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path)); + } }; fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath { @@ -183,7 +122,7 @@ pub const HexDigest = [hex_digest_len]u8; /// This is currently just an arbitrary non-empty string that can't match another manifest line. const manifest_header = "0"; -const manifest_file_size_max = 50 * 1024 * 1024; +const manifest_file_size_max = 100 * 1024 * 1024; /// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it /// provides enough collision resistance for the Manifest use cases, while being one of our @@ -201,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{ }); pub const File = struct { - prefixed_path: ?PrefixedPath, + prefixed_path: PrefixedPath, max_file_size: ?usize, stat: Stat, bin_digest: BinDigest, @@ -214,16 +153,18 @@ pub const File = struct { }; pub fn deinit(self: *File, gpa: Allocator) void { - if (self.prefixed_path) |pp| { - gpa.free(pp.sub_path); - self.prefixed_path = null; - } + gpa.free(self.prefixed_path.sub_path); if (self.contents) |contents| { gpa.free(contents); self.contents = null; } self.* = undefined; } + + pub fn updateMaxSize(file: *File, new_max_size: ?usize) void { + const new = new_max_size orelse return; + file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new; + } }; pub const HashHelper = struct { @@ -365,7 +306,7 @@ pub const Manifest = struct { // order to obtain a problematic timestamp for the next call. Calls after that // will then use the same timestamp, to avoid unnecessary filesystem writes. want_refresh_timestamp: bool = true, - files: std.ArrayListUnmanaged(File) = .{}, + files: Files = .{}, hex_digest: HexDigest, /// Populated when hit() returns an error because of one /// of the files listed in the manifest. @@ -374,6 +315,34 @@ pub const Manifest = struct { /// what time the file system thinks it is, according to its own granularity. recent_problematic_timestamp: i128 = 0, + pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false); + + pub const FilesContext = struct { + pub fn hash(fc: FilesContext, file: File) u32 { + _ = fc; + return file.prefixed_path.hash(); + } + + pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool { + _ = fc; + _ = b_index; + return a.prefixed_path.eql(b.prefixed_path); + } + }; + + const FilesAdapter = struct { + pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool { + _ = context; + _ = b_index; + return a.eql(b.prefixed_path); + } + + pub fn hash(context: @This(), key: PrefixedPath) u32 { + _ = context; + return key.hash(); + } + }; + /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. @@ -386,7 +355,7 @@ pub const Manifest = struct { /// to access the contents of the file after calling `hit()` like so: /// /// ``` - /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// var file_contents = cache_hash.files.keys()[file_index].contents.?; /// ``` pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize { assert(self.manifest_file == null); @@ -396,7 +365,12 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - self.files.addOneAssumeCapacity().* = .{ + const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{}); + if (gop.found_existing) { + gop.key_ptr.updateMaxSize(max_file_size); + return gop.index; + } + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .contents = null, .max_file_size = max_file_size, @@ -407,7 +381,7 @@ pub const Manifest = struct { self.hash.add(prefixed_path.prefix); self.hash.addBytes(prefixed_path.sub_path); - return self.files.items.len - 1; + return gop.index; } pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void { @@ -487,7 +461,7 @@ pub const Manifest = struct { self.want_refresh_timestamp = true; - const input_file_count = self.files.items.len; + const input_file_count = self.files.entries.len; while (true) : (self.unhit(bin_digest, input_file_count)) { const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max); defer gpa.free(file_contents); @@ -499,7 +473,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -510,18 +484,6 @@ pub const Manifest = struct { while (line_iter.next()) |line| { defer idx += 1; - const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { - const new = try self.files.addOne(gpa); - new.* = .{ - .prefixed_path = null, - .contents = null, - .max_file_size = null, - .stat = undefined, - .bin_digest = undefined, - }; - break :blk new; - }; - var iter = mem.tokenizeScalar(u8, line, ' '); const size = iter.next() orelse return error.InvalidFormat; const inode = iter.next() orelse return error.InvalidFormat; @@ -530,30 +492,61 @@ pub const Manifest = struct { const prefix_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); - cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; - cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; - cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; - _ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; + const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; + const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + const file_bin_digest = b: { + if (digest_str.len != hex_digest_len) return error.InvalidFormat; + var bd: BinDigest = undefined; + _ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat; + break :b bd; + }; + const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat; if (prefix >= self.cache.prefixes_len) return error.InvalidFormat; - if (file_path.len == 0) { - return error.InvalidFormat; - } - if (cache_hash_file.prefixed_path) |pp| { - if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) { - return error.InvalidFormat; - } - } + if (file_path.len == 0) return error.InvalidFormat; - if (cache_hash_file.prefixed_path == null) { - cache_hash_file.prefixed_path = .{ + const cache_hash_file = f: { + const prefixed_path: PrefixedPath = .{ .prefix = prefix, - .sub_path = try gpa.dupe(u8, file_path), + .sub_path = file_path, // expires with file_contents }; - } + if (idx < input_file_count) { + const file = &self.files.keys()[idx]; + if (!file.prefixed_path.eql(prefixed_path)) + return error.InvalidFormat; + + file.stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }; + file.bin_digest = file_bin_digest; + break :f file; + } + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + if (!gop.found_existing) { + gop.key_ptr.* = .{ + .prefixed_path = .{ + .prefix = prefix, + .sub_path = try gpa.dupe(u8, file_path), + }, + .contents = null, + .max_file_size = null, + .stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }, + .bin_digest = file_bin_digest, + }; + } + break :f gop.key_ptr; + }; - const pp = cache_hash_file.prefixed_path.?; + const pp = cache_hash_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) { error.FileNotFound => { @@ -617,7 +610,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -640,12 +633,12 @@ pub const Manifest = struct { self.hash.hasher.update(&bin_digest); // Remove files not in the initial hash. - for (self.files.items[input_file_count..]) |*file| { + for (self.files.keys()[input_file_count..]) |*file| { file.deinit(self.cache.gpa); } self.files.shrinkRetainingCapacity(input_file_count); - for (self.files.items) |file| { + for (self.files.keys()) |file| { self.hash.hasher.update(&file.bin_digest); } } @@ -685,7 +678,7 @@ pub const Manifest = struct { } fn populateFileHash(self: *Manifest, ch_file: *File) !void { - const pp = ch_file.prefixed_path.?; + const pp = ch_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const file = try dir.openFile(pp.sub_path, .{}); defer file.close(); @@ -751,7 +744,7 @@ pub const Manifest = struct { .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1); try self.populateFileHash(new_ch_file); @@ -759,9 +752,11 @@ pub const Manifest = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. This is useful for processes that don't know the all the files that - /// are depended on ahead of time. For example, a source file that can import other files - /// will need to be recompiled if the imported file is changed. + /// calculated. + /// + /// This is useful for processes that don't know the all the files that are + /// depended on ahead of time. For example, a source file that can import + /// other files will need to be recompiled if the imported file is changed. pub fn addFilePost(self: *Manifest, file_path: []const u8) !void { assert(self.manifest_file != null); @@ -769,17 +764,26 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - const new_ch_file = try self.files.addOne(gpa); - new_ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = undefined, .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - try self.populateFileHash(new_ch_file); + self.files.lockPointers(); + defer self.files.unlockPointers(); + + try self.populateFileHash(gop.key_ptr); } /// Like `addFilePost` but when the file contents have already been loaded from disk. @@ -793,13 +797,20 @@ pub const Manifest = struct { assert(self.manifest_file != null); const gpa = self.cache.gpa; - const ch_file = try self.files.addOne(gpa); - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - const prefixed_path = try self.cache.findPrefixResolved(resolved_path); errdefer gpa.free(prefixed_path.sub_path); - ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + const new_file = gop.key_ptr; + + new_file.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = stat, @@ -807,19 +818,19 @@ pub const Manifest = struct { .contents = null, }; - if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + if (self.isProblematicTimestamp(new_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed - ch_file.stat.mtime = 0; - ch_file.stat.inode = 0; + new_file.stat.mtime = 0; + new_file.stat.inode = 0; } { var hasher = hasher_init; hasher.update(bytes); - hasher.final(&ch_file.bin_digest); + hasher.final(&new_file.bin_digest); } - self.hash.hasher.update(&ch_file.bin_digest); + self.hash.hasher.update(&new_file.bin_digest); } pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { @@ -885,14 +896,14 @@ pub const Manifest = struct { const writer = contents.writer(); try writer.writeAll(manifest_header ++ "\n"); - for (self.files.items) |file| { + for (self.files.keys()) |file| { try writer.print("{d} {d} {d} {} {d} {s}\n", .{ file.stat.size, file.stat.inode, file.stat.mtime, fmt.fmtSliceHexLower(&file.bin_digest), - file.prefixed_path.?.prefix, - file.prefixed_path.?.sub_path, + file.prefixed_path.prefix, + file.prefixed_path.sub_path, }); } @@ -961,7 +972,7 @@ pub const Manifest = struct { file.close(); } - for (self.files.items) |*file| { + for (self.files.keys()) |*file| { file.deinit(self.cache.gpa); } self.files.deinit(self.cache.gpa); @@ -1130,7 +1141,7 @@ test "check that changing a file makes cache fail" { // There should be nothing in the cache try testing.expectEqual(false, try ch.hit()); - try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?)); digest1 = ch.final(); @@ -1150,7 +1161,7 @@ test "check that changing a file makes cache fail" { try testing.expectEqual(false, try ch.hit()); // The cache system does not keep the contents of re-hashed input files. - try testing.expect(ch.files.items[temp_file_idx].contents == null); + try testing.expect(ch.files.keys()[temp_file_idx].contents == null); digest2 = ch.final(); diff --git a/lib/std/Build/Cache/Directory.zig b/lib/std/Build/Cache/Directory.zig new file mode 100644 index 000000000000..4de1cc18f1bb --- /dev/null +++ b/lib/std/Build/Cache/Directory.zig @@ -0,0 +1,74 @@ +const Directory = @This(); +const std = @import("../../std.zig"); +const fs = std.fs; +const fmt = std.fmt; +const Allocator = std.mem.Allocator; + +/// This field is redundant for operations that can act on the open directory handle +/// directly, but it is needed when passing the directory to a child process. +/// `null` means cwd. +path: ?[]const u8, +handle: fs.Dir, + +pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory { + return .{ + .path = if (d.path) |p| try arena.dupe(u8, p) else null, + .handle = d.handle, + }; +} + +pub fn cwd() Directory { + return .{ + .path = null, + .handle = fs.cwd(), + }; +} + +pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try fs.path.join(allocator, paths); + defer allocator.free(part2); + return fs.path.join(allocator, &[_][]const u8{ p, part2 }); + } else { + return fs.path.join(allocator, paths); + } +} + +pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try fs.path.join(allocator, paths); + defer allocator.free(part2); + return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 }); + } else { + return fs.path.joinZ(allocator, paths); + } +} + +/// Whether or not the handle should be closed, or the path should be freed +/// is determined by usage, however this function is provided for convenience +/// if it happens to be what the caller needs. +pub fn closeAndFree(self: *Directory, gpa: Allocator) void { + self.handle.close(); + if (self.path) |p| gpa.free(p); + self.* = undefined; +} + +pub fn format( + self: Directory, + comptime fmt_string: []const u8, + options: fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self); + if (self.path) |p| { + try writer.writeAll(p); + try writer.writeAll(fs.path.sep_str); + } +} + +pub fn eql(self: Directory, other: Directory) bool { + return self.handle.fd == other.handle.fd; +} diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig new file mode 100644 index 000000000000..99ce2e12eee4 --- /dev/null +++ b/lib/std/Build/Cache/Path.zig @@ -0,0 +1,154 @@ +root_dir: Cache.Directory, +/// The path, relative to the root dir, that this `Path` represents. +/// Empty string means the root_dir is the path. +sub_path: []const u8 = "", + +pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path { + return .{ + .root_dir = try p.root_dir.clone(arena), + .sub_path = try arena.dupe(u8, p.sub_path), + }; +} + +pub fn cwd() Path { + return .{ .root_dir = Cache.Directory.cwd() }; +} + +pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { + if (sub_path.len == 0) return p; + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return .{ + .root_dir = p.root_dir, + .sub_path = try fs.path.join(arena, parts), + }; +} + +pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { + if (sub_path.len == 0) return p; + return .{ + .root_dir = p.root_dir, + .sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }), + }; +} + +pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return p.root_dir.join(allocator, parts); +} + +pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return p.root_dir.joinZ(allocator, parts); +} + +pub fn openFile( + p: Path, + sub_path: []const u8, + flags: fs.File.OpenFlags, +) !fs.File { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.openFile(joined_path, flags); +} + +pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.makeOpenPath(joined_path, opts); +} + +pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.statFile(joined_path); +} + +pub fn atomicFile( + p: Path, + sub_path: []const u8, + options: fs.Dir.AtomicFileOptions, + buf: *[fs.MAX_PATH_BYTES]u8, +) !fs.AtomicFile { + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.atomicFile(joined_path, options); +} + +pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.access(joined_path, flags); +} + +pub fn makePath(p: Path, sub_path: []const u8) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.makePath(joined_path); +} + +pub fn format( + self: Path, + comptime fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + if (fmt_string.len == 1) { + // Quote-escape the string. + const stringEscape = std.zig.stringEscape; + const f = switch (fmt_string[0]) { + 'q' => "", + '\'' => '\'', + else => @compileError("unsupported format string: " ++ fmt_string), + }; + if (self.root_dir.path) |p| { + try stringEscape(p, f, options, writer); + if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer); + } + if (self.sub_path.len > 0) { + try stringEscape(self.sub_path, f, options, writer); + } + return; + } + if (fmt_string.len > 0) + std.fmt.invalidFmtError(fmt_string, self); + if (self.root_dir.path) |p| { + try writer.writeAll(p); + try writer.writeAll(fs.path.sep_str); + } + if (self.sub_path.len > 0) { + try writer.writeAll(self.sub_path); + try writer.writeAll(fs.path.sep_str); + } +} + +const Path = @This(); +const std = @import("../../std.zig"); +const fs = std.fs; +const Allocator = std.mem.Allocator; +const Cache = std.Build.Cache; diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index f67cba6c3fb8..48af9e54d6fa 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool { fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix].path orelse ""; return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path }); } diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig index d3bd1b2a0467..ba086f876446 100644 --- a/lib/std/array_hash_map.zig +++ b/lib/std/array_hash_map.zig @@ -9,23 +9,26 @@ const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; const hash_map = @This(); -/// An ArrayHashMap with default hash and equal functions. -/// See AutoContext for a description of the hash and equal implementations. +/// An `ArrayHashMap` with default hash and equal functions. +/// +/// See `AutoContext` for a description of the hash and equal implementations. pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type { return ArrayHashMap(K, V, AutoContext(K), !autoEqlIsCheap(K)); } -/// An ArrayHashMapUnmanaged with default hash and equal functions. -/// See AutoContext for a description of the hash and equal implementations. +/// An `ArrayHashMapUnmanaged` with default hash and equal functions. +/// +/// See `AutoContext` for a description of the hash and equal implementations. pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type { return ArrayHashMapUnmanaged(K, V, AutoContext(K), !autoEqlIsCheap(K)); } -/// Builtin hashmap for strings as keys. +/// An `ArrayHashMap` with strings as keys. pub fn StringArrayHashMap(comptime V: type) type { return ArrayHashMap([]const u8, V, StringContext, true); } +/// An `ArrayHashMapUnmanaged` with strings as keys. pub fn StringArrayHashMapUnmanaged(comptime V: type) type { return ArrayHashMapUnmanaged([]const u8, V, StringContext, true); } @@ -50,29 +53,33 @@ pub fn hashString(s: []const u8) u32 { return @as(u32, @truncate(std.hash.Wyhash.hash(0, s))); } -/// Insertion order is preserved. -/// Deletions perform a "swap removal" on the entries list. +/// A hash table of keys and values, each stored sequentially. +/// +/// Insertion order is preserved. In general, this data structure supports the same +/// operations as `std.ArrayList`. +/// +/// Deletion operations: +/// * `swapRemove` - O(1) +/// * `orderedRemove` - O(N) +/// /// Modifying the hash map while iterating is allowed, however, one must understand /// the (well defined) behavior when mixing insertions and deletions with iteration. -/// For a hash map that can be initialized directly that does not store an Allocator -/// field, see `ArrayHashMapUnmanaged`. -/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` -/// functions. It does not store each item's hash in the table. Setting `store_hash` -/// to `true` incurs slightly more memory cost by storing each key's hash in the table -/// but only has to call `eql` for hash collisions. -/// If typical operations (except iteration over entries) need to be faster, prefer -/// the alternative `std.HashMap`. -/// Context must be a struct type with two member functions: -/// hash(self, K) u32 -/// eql(self, K, K, usize) bool -/// Adapted variants of many functions are provided. These variants -/// take a pseudo key instead of a key. Their context must have the functions: -/// hash(self, PseudoKey) u32 -/// eql(self, PseudoKey, K, usize) bool +/// +/// See `ArrayHashMapUnmanaged` for a variant of this data structure that accepts an +/// `Allocator` as a parameter when needed rather than storing it. pub fn ArrayHashMap( comptime K: type, comptime V: type, + /// A namespace that provides these two functions: + /// * `pub fn hash(self, K) u32` + /// * `pub fn eql(self, K, K) bool` + /// comptime Context: type, + /// When `false`, this data structure is biased towards cheap `eql` + /// functions and avoids storing each key's hash in the table. Setting + /// `store_hash` to `true` incurs more memory cost but limits `eql` to + /// being called only once per insertion/deletion (provided there are no + /// hash collisions). comptime store_hash: bool, ) type { return struct { @@ -472,34 +479,40 @@ pub fn ArrayHashMap( }; } -/// General purpose hash table. -/// Insertion order is preserved. -/// Deletions perform a "swap removal" on the entries list. +/// A hash table of keys and values, each stored sequentially. +/// +/// Insertion order is preserved. In general, this data structure supports the same +/// operations as `std.ArrayListUnmanaged`. +/// +/// Deletion operations: +/// * `swapRemove` - O(1) +/// * `orderedRemove` - O(N) +/// /// Modifying the hash map while iterating is allowed, however, one must understand /// the (well defined) behavior when mixing insertions and deletions with iteration. -/// This type does not store an Allocator field - the Allocator must be passed in +/// +/// This type does not store an `Allocator` field - the `Allocator` must be passed in /// with each function call that requires it. See `ArrayHashMap` for a type that stores -/// an Allocator field for convenience. +/// an `Allocator` field for convenience. +/// /// Can be initialized directly using the default field values. +/// /// This type is designed to have low overhead for small numbers of entries. When /// `store_hash` is `false` and the number of entries in the map is less than 9, /// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is /// only a single pointer-sized integer. -/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` -/// functions. It does not store each item's hash in the table. Setting `store_hash` -/// to `true` incurs slightly more memory cost by storing each key's hash in the table -/// but guarantees only one call to `eql` per insertion/deletion. -/// Context must be a struct type with two member functions: -/// hash(self, K) u32 -/// eql(self, K, K) bool -/// Adapted variants of many functions are provided. These variants -/// take a pseudo key instead of a key. Their context must have the functions: -/// hash(self, PseudoKey) u32 -/// eql(self, PseudoKey, K) bool pub fn ArrayHashMapUnmanaged( comptime K: type, comptime V: type, + /// A namespace that provides these two functions: + /// * `pub fn hash(self, K) u32` + /// * `pub fn eql(self, K, K) bool` comptime Context: type, + /// When `false`, this data structure is biased towards cheap `eql` + /// functions and avoids storing each key's hash in the table. Setting + /// `store_hash` to `true` incurs more memory cost but limits `eql` to + /// being called only once per insertion/deletion (provided there are no + /// hash collisions). comptime store_hash: bool, ) type { return struct { @@ -516,10 +529,6 @@ pub fn ArrayHashMapUnmanaged( /// Used to detect memory safety violations. pointer_stability: std.debug.SafetyLock = .{}, - comptime { - std.hash_map.verifyContext(Context, K, K, u32, true); - } - /// Modifying the key is allowed only if it does not change the hash. /// Modifying the value is allowed. /// Entry pointers become invalid whenever this ArrayHashMap is modified, @@ -1834,27 +1843,16 @@ pub fn ArrayHashMapUnmanaged( } } - inline fn checkedHash(ctx: anytype, key: anytype) u32 { - comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32, true); + fn checkedHash(ctx: anytype, key: anytype) u32 { // If you get a compile error on the next line, it means that your // generic hash function doesn't accept your key. - const hash = ctx.hash(key); - if (@TypeOf(hash) != u32) { - @compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type!\n" ++ - @typeName(u32) ++ " was expected, but found " ++ @typeName(@TypeOf(hash))); - } - return hash; + return ctx.hash(key); } - inline fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool { - comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32, true); + + fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool { // If you get a compile error on the next line, it means that your // generic eql function doesn't accept (self, adapt key, K, index). - const eql = ctx.eql(a, b, b_index); - if (@TypeOf(eql) != bool) { - @compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++ - @typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql))); - } - return eql; + return ctx.eql(a, b, b_index); } fn dumpState(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8) void { diff --git a/src/Compilation.zig b/src/Compilation.zig index 7a84848b80b2..2c047504e9f8 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void const is_hit = man.hit() catch |err| { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix]; return comp.setMiscFailure( .check_whole_cache, @@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module const prev_hash_state = man.hash.peekBin(); const actual_hit = hit: { _ = try man.hit(); - if (man.files.items.len == 0) { + if (man.files.entries.len == 0) { man.unhit(prev_hash_state, 0); break :hit false; } diff --git a/src/Package.zig b/src/Package.zig index 1bb02c5a5afc..e173665e1192 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -2,162 +2,3 @@ pub const Module = @import("Package/Module.zig"); pub const Fetch = @import("Package/Fetch.zig"); pub const build_zig_basename = "build.zig"; pub const Manifest = @import("Package/Manifest.zig"); - -pub const Path = struct { - root_dir: Cache.Directory, - /// The path, relative to the root dir, that this `Path` represents. - /// Empty string means the root_dir is the path. - sub_path: []const u8 = "", - - pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path { - return .{ - .root_dir = try p.root_dir.clone(arena), - .sub_path = try arena.dupe(u8, p.sub_path), - }; - } - - pub fn cwd() Path { - return .{ .root_dir = Cache.Directory.cwd() }; - } - - pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { - if (sub_path.len == 0) return p; - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return .{ - .root_dir = p.root_dir, - .sub_path = try fs.path.join(arena, parts), - }; - } - - pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { - if (sub_path.len == 0) return p; - return .{ - .root_dir = p.root_dir, - .sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }), - }; - } - - pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.join(allocator, parts); - } - - pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.joinZ(allocator, parts); - } - - pub fn openFile( - p: Path, - sub_path: []const u8, - flags: fs.File.OpenFlags, - ) !fs.File { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.openFile(joined_path, flags); - } - - pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.makeOpenPath(joined_path, opts); - } - - pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.statFile(joined_path); - } - - pub fn atomicFile( - p: Path, - sub_path: []const u8, - options: fs.Dir.AtomicFileOptions, - buf: *[fs.MAX_PATH_BYTES]u8, - ) !fs.AtomicFile { - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.atomicFile(joined_path, options); - } - - pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.access(joined_path, flags); - } - - pub fn makePath(p: Path, sub_path: []const u8) !void { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.makePath(joined_path); - } - - pub fn format( - self: Path, - comptime fmt_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - if (fmt_string.len == 1) { - // Quote-escape the string. - const stringEscape = std.zig.stringEscape; - const f = switch (fmt_string[0]) { - 'q' => "", - '\'' => '\'', - else => @compileError("unsupported format string: " ++ fmt_string), - }; - if (self.root_dir.path) |p| { - try stringEscape(p, f, options, writer); - if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer); - } - if (self.sub_path.len > 0) { - try stringEscape(self.sub_path, f, options, writer); - } - return; - } - if (fmt_string.len > 0) - std.fmt.invalidFmtError(fmt_string, self); - if (self.root_dir.path) |p| { - try writer.writeAll(p); - try writer.writeAll(fs.path.sep_str); - } - if (self.sub_path.len > 0) { - try writer.writeAll(self.sub_path); - try writer.writeAll(fs.path.sep_str); - } - } -}; - -const Package = @This(); -const builtin = @import("builtin"); -const std = @import("std"); -const fs = std.fs; -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const Cache = std.Build.Cache; diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index a40bb539f78b..d0cfd5ab9491 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -33,7 +33,7 @@ location_tok: std.zig.Ast.TokenIndex, hash_tok: std.zig.Ast.TokenIndex, name_tok: std.zig.Ast.TokenIndex, lazy_status: LazyStatus, -parent_package_root: Package.Path, +parent_package_root: Cache.Path, parent_manifest_ast: ?*const std.zig.Ast, prog_node: *std.Progress.Node, job_queue: *JobQueue, @@ -50,7 +50,7 @@ allow_missing_paths_field: bool, /// This will either be relative to `global_cache`, or to the build root of /// the root package. -package_root: Package.Path, +package_root: Cache.Path, error_bundle: ErrorBundle.Wip, manifest: ?Manifest, manifest_ast: std.zig.Ast, @@ -263,7 +263,7 @@ pub const JobQueue = struct { pub const Location = union(enum) { remote: Remote, /// A directory found inside the parent package. - relative_path: Package.Path, + relative_path: Cache.Path, /// Recursive Fetch tasks will never use this Location, but it may be /// passed in by the CLI. Indicates the file contents here should be copied /// into the global package cache. It may be a file relative to the cwd or @@ -564,7 +564,7 @@ fn checkBuildFileExistence(f: *Fetch) RunError!void { } /// This function populates `f.manifest` or leaves it `null`. -fn loadManifest(f: *Fetch, pkg_root: Package.Path) RunError!void { +fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { const eb = &f.error_bundle; const arena = f.arena.allocator(); const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions( @@ -722,7 +722,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { } pub fn relativePathDigest( - pkg_root: Package.Path, + pkg_root: Cache.Path, cache_root: Cache.Directory, ) Manifest.MultiHashHexDigest { var hasher = Manifest.Hash.init(.{}); @@ -1658,7 +1658,7 @@ const Filter = struct { }; pub fn depDigest( - pkg_root: Package.Path, + pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifest.Dependency, ) ?Manifest.MultiHashHexDigest { diff --git a/src/Package/Module.zig b/src/Package/Module.zig index d6b89efb41b2..f9cb4475ad50 100644 --- a/src/Package/Module.zig +++ b/src/Package/Module.zig @@ -3,7 +3,7 @@ //! to Zcu. https://github.com/ziglang/zig/issues/14307 /// Only files inside this directory can be imported. -root: Package.Path, +root: Cache.Path, /// Relative to `root`. May contain path separators. root_src_path: []const u8, /// Name used in compile errors. Looks like "root.foo.bar". @@ -69,7 +69,7 @@ pub const CreateOptions = struct { builtin_modules: ?*std.StringHashMapUnmanaged(*Module), pub const Paths = struct { - root: Package.Path, + root: Cache.Path, /// Relative to `root`. May contain path separators. root_src_path: []const u8, }; @@ -463,7 +463,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { /// All fields correspond to `CreateOptions`. pub const LimitedOptions = struct { - root: Package.Path, + root: Cache.Path, root_src_path: []const u8, fully_qualified_name: []const u8, }; diff --git a/src/glibc.zig b/src/glibc.zig index 9765e0ad78bb..365f5cab2b43 100644 --- a/src/glibc.zig +++ b/src/glibc.zig @@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo }; defer o_directory.handle.close(); - const abilists_contents = man.files.items[abilists_index].contents.?; + const abilists_contents = man.files.keys()[abilists_index].contents.?; const metadata = try loadMetaData(comp.gpa, abilists_contents); defer metadata.destroy(comp.gpa); diff --git a/src/main.zig b/src/main.zig index 4407ca72ced7..c60cc100ab40 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6143,7 +6143,7 @@ fn cmdAstCheck( } file.mod = try Package.Module.createLimited(arena, .{ - .root = Package.Path.cwd(), + .root = Cache.Path.cwd(), .root_src_path = file.sub_file_path, .fully_qualified_name = "root", }); @@ -6316,7 +6316,7 @@ fn cmdChangelist( }; file.mod = try Package.Module.createLimited(arena, .{ - .root = Package.Path.cwd(), + .root = Cache.Path.cwd(), .root_src_path = file.sub_file_path, .fully_qualified_name = "root", });