Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 82 additions & 4 deletions tar/tar_stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,38 @@ export interface TarStreamDir {
options?: TarStreamOptions;
}

/**
* The interface required to provide a symbolic link.
*
* @experimental **UNSTABLE**: New API, yet to be vetted.
*/
export interface TarStreamSymlink {
/**
* The type of the input.
*/
type: "symlink";
/**
* The path of the symbolic link, relative to the archive's root directory.
*/
path: string;
/**
* The target path that the symbolic link points to.
* Must be at most 100 bytes per the ustar format spec.
*/
linkname: string;
/**
* The metadata of the symbolic link.
*/
options?: TarStreamOptions;
}

/**
* A union type merging all the TarStream interfaces that can be piped into the
* TarStream class.
*
* @experimental **UNSTABLE**: New API, yet to be vetted.
*/
export type TarStreamInput = TarStreamFile | TarStreamDir;
export type TarStreamInput = TarStreamFile | TarStreamDir | TarStreamSymlink;

const SLASH_CODE_POINT = "/".charCodeAt(0);

Expand All @@ -121,7 +146,8 @@ const SLASH_CODE_POINT = "/".charCodeAt(0);
* The ustar file format is used for creating the tar archive. While this
* format is compatible with most tar readers, the format has several
* limitations, including:
* - Paths must be at most 256 characters.
* - Paths must be at most 256 bytes.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected this text because what the validation logic actually checks is the number of bytes, not the number of characters.
The ustar spec mentions that it only supports ASCII characters (in which case the number of characters == the number of bytes), but it seems like the existing validation for paths doesn't check if a path consists only of ASCII or not. Also, it appears that most tar utility tools (like gnu tar) can handle non-ASCII characters without any issue. So, although it's not strictly compliant with the ustar spec, TarStream can process non-ASCII characters.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ustar spec, the one linked in the file, makes no mention of requiring any specific encoding for name, linkname, and prefix. It specifies that many of the other fields are to be written in ASCII, but does not for them. As long as they are within the byte limit they are compliant.

I have no problem with this change in wording though.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's right, thanks for the correction. I might have been confused with something else.

* - Symlink target paths (linkname) must be at most 100 bytes.
* - Files must be at most 8 GiBs in size, or 64 GiBs if `sizeExtension` is set
* to true.
* - Sparse files are not supported.
Expand Down Expand Up @@ -229,7 +255,11 @@ export class TarStream implements TransformStream<TarStreamInput, Uint8Array_> {
buffer: Uint8Array_,
): void {
input.options ??= {};
input.options.mode ??= input.type === "file" ? 0o644 : 0o755;
input.options.mode ??= input.type === "file"
? 0o644
: input.type === "symlink"
? 0o777
: 0o755;
input.options.uid ??= 0o0;
input.options.gid ??= 0o0;
input.options.mtime ??= Math.floor(Date.now() / 1000);
Expand Down Expand Up @@ -271,9 +301,18 @@ export class TarStream implements TransformStream<TarStreamInput, Uint8Array_> {
// checksum (8)
buffer.fill(32, 148, 156);
// typeflag (1)
buffer[156] = input.type === "file" ? 48 : 53;
buffer[156] = input.type === "file"
? 48
: input.type === "symlink"
? 50
: 53;
// linkname (100)
buffer.fill(0, 157, 257);
if (input.type === "symlink") {
checkLinkname(
this.#encoder.encodeInto(input.linkname, buffer.subarray(157)).written,
);
}
// magic (6)
buffer[257] = 117;
buffer[258] = 115;
Expand Down Expand Up @@ -319,6 +358,7 @@ export class TarStream implements TransformStream<TarStreamInput, Uint8Array_> {
buffer = yield buffer.subarray(0, 512);

if (input.type === "directory") continue;
if (input.type === "symlink") continue;

let size = 0;
const reader = toByteStream(input.readable).getReader({ mode: "byob" });
Expand Down Expand Up @@ -641,6 +681,44 @@ export function assertValidPath(path: string): void {
parsePath(new TextEncoder().encodeInto(path, buffer).written, buffer);
}

/**
* Asserts that the linkname provided is valid for a {@linkcode TarStream}.
*
* @experimental **UNSTABLE**: New API, yet to be vetted.
*
* @param linkname The linkname as a string
*
* @example Usage
* ```ts no-assert ignore
* import { assertValidLinkname, TarStream, type TarStreamInput } from "@std/tar";
*
* const linkname = "./target";
* assertValidLinkname(linkname);
* await ReadableStream.from<TarStreamInput>([
* { type: "symlink", path: "./link", linkname },
* ])
* .pipeThrough(new TarStream())
* .pipeTo((await Deno.create('./out.tar')).writable);
* ```
*/
export function assertValidLinkname(linkname: string): void {
const buffer = new Uint8Array(355);
checkLinkname(new TextEncoder().encodeInto(linkname, buffer).written);
}

function checkLinkname(bytes: number): void {
if (bytes === 0) {
throw new TypeError(
"Cannot add to the tar archive: Invalid Linkname provided",
);
}
if (bytes > 100) {
throw new TypeError(
`Cannot add to the tar archive: Linkname cannot exceed 100 bytes: The linkname length is ${bytes}`,
);
}
}

function parseOctalInto(x: number, buffer: Uint8Array_): void {
for (let i = buffer.length - 1; i >= 0; --i) {
buffer[i] = x % 8 + 48;
Expand Down
181 changes: 181 additions & 0 deletions tar/tar_stream_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
import { assert, assertEquals } from "@std/assert";
import { concat } from "@std/bytes";
import {
assertValidLinkname,
assertValidPath,
assertValidTarStreamOptions,
TarStream,
type TarStreamInput,
type Uint8Array_,
} from "./tar_stream.ts";
import { UntarStream } from "./untar_stream.ts";
import { assertThrows } from "@std/assert/throws";
import { assertRejects } from "@std/assert/rejects";

Expand Down Expand Up @@ -374,3 +376,182 @@ Deno.test("TarStream() decoding header", async () => {
new Uint8Array(12),
);
});

Deno.test("TarStream() with symlink", async () => {
const buffer = await new Response(
ReadableStream
.from<TarStreamInput>([
{
type: "symlink",
path: "./link",
linkname: "./target",
},
])
.pipeThrough(new TarStream()),
).bytes();

// 512 (header) + 1024 (end padding)
assertEquals(buffer.length, 512 + 1024);
});

Deno.test("TarStream() symlink has correct typeflag", async () => {
const buffer = await new Response(
ReadableStream
.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname: "./target" },
])
.pipeThrough(new TarStream()),
).bytes();

// typeflag at byte 156 should be 50 (ASCII '2' for symlink)
assertEquals(buffer[156], 50);
});

Deno.test("TarStream() symlink writes linkname to header", async () => {
const buffer = await new Response(
ReadableStream
.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname: "./target" },
])
.pipeThrough(new TarStream()),
).bytes();

const decoder = new TextDecoder();
const linkname = decoder.decode(buffer.subarray(157, 257)).split("\0")[0];
assertEquals(linkname, "./target");
});

Deno.test("TarStream() symlink round-trip", async () => {
const readable = ReadableStream.from<TarStreamInput>([
{
type: "symlink",
path: "./mylink",
linkname: "./target/file.txt",
},
])
.pipeThrough(new TarStream())
.pipeThrough(new UntarStream());

for await (const entry of readable) {
assertEquals(entry.path, "./mylink");
assertEquals(entry.header.typeflag, "2");
assertEquals(entry.header.linkname, "./target/file.txt");
assertEquals(entry.header.size, 0);
assertEquals(entry.readable, undefined);
}
});

Deno.test("TarStream() rejects empty linkname", async () => {
await assertRejects(
async () =>
await new Response(
ReadableStream
.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname: "" },
])
.pipeThrough(new TarStream()),
).bytes(),
TypeError,
"Cannot add to the tar archive: Invalid Linkname provided",
);
});

Deno.test("TarStream() rejects linkname exceeding 100 bytes", async () => {
await assertRejects(
async () =>
await new Response(
ReadableStream
.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname: "a".repeat(101) },
])
.pipeThrough(new TarStream()),
).bytes(),
TypeError,
"Cannot add to the tar archive: Linkname cannot exceed 100 bytes",
);
});

Deno.test("TarStream() symlink with non-ASCII linkname round-trip", async () => {
const linkname = "target/\u3042";

const readable = ReadableStream.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname },
])
.pipeThrough(new TarStream())
.pipeThrough(new UntarStream());

for await (const entry of readable) {
assertEquals(entry.header.linkname, linkname);
}
});

Deno.test("TarStream() accepts exactly 100 byte linkname", async () => {
const linkname = "a".repeat(100);

const readable = ReadableStream.from<TarStreamInput>([
{ type: "symlink", path: "./link", linkname },
])
.pipeThrough(new TarStream())
.pipeThrough(new UntarStream());

for await (const entry of readable) {
assertEquals(entry.header.linkname, linkname);
}
});

Deno.test("TarStream() mixed archive with files, dirs, and symlinks", async () => {
const text = new TextEncoder().encode("Hello World!");

const readable = ReadableStream.from<TarStreamInput>([
{ type: "directory", path: "./dir" },
{
type: "file",
path: "./dir/file.txt",
size: text.length,
readable: ReadableStream.from([text.slice()]),
},
{ type: "symlink", path: "./link-to-file", linkname: "./dir/file.txt" },
])
.pipeThrough(new TarStream())
.pipeThrough(new UntarStream());

const entries: { path: string; typeflag: string }[] = [];
for await (const entry of readable) {
entries.push({ path: entry.path, typeflag: entry.header.typeflag });
await entry.readable?.cancel();
}

assertEquals(entries, [
{ path: "./dir", typeflag: "5" },
{ path: "./dir/file.txt", typeflag: "0" },
{ path: "./link-to-file", typeflag: "2" },
]);
});

Deno.test("assertValidLinkname()", () => {
assertValidLinkname("./target");
assertValidLinkname("a".repeat(100));
assertValidLinkname("target/\u3042");
assertValidLinkname("\u00e4".repeat(50));
assertValidLinkname("あ".repeat(33));
assertThrows(
() => assertValidLinkname(""),
TypeError,
"Cannot add to the tar archive: Invalid Linkname provided",
);
assertThrows(
() => assertValidLinkname("a".repeat(101)),
TypeError,
"Cannot add to the tar archive: Linkname cannot exceed 100 bytes",
);
assertThrows(
() => assertValidLinkname("\u00e4".repeat(51)),
TypeError,
"Cannot add to the tar archive: Linkname cannot exceed 100 bytes",
);
assertThrows(
() => assertValidLinkname("あ".repeat(34)),
TypeError,
"Cannot add to the tar archive: Linkname cannot exceed 100 bytes",
);
});
Loading