From d18ebfd82ee3fd1f7032a6f543f078d950bf12f7 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Mon, 31 Jan 2022 15:12:50 -0800 Subject: [PATCH 1/8] Define less ambiguous UnixFS spec in TS types --- unixfs.ts | 489 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 489 insertions(+) create mode 100644 unixfs.ts diff --git a/unixfs.ts b/unixfs.ts new file mode 100644 index 000000000..ec2b939fc --- /dev/null +++ b/unixfs.ts @@ -0,0 +1,489 @@ +/** + * Logical representation of a file chunk. + * + * TODO: Clarify when this represenation is used as opposed to `FileChunk`. + */ +export interface RawChunk extends PBNode { + Data: ByteView<{ + Type: DataType.Raw, + /** + * Raw bytes of the content + */ + Data: Bytes + }> + /** + * Raw nodes MUST not have any links, yet empty `Links` list is expected. + * At the type level it is expressed as `never[]` which guarantees that + * no instatiation other than empty will satisfy this constraint + */ + Links: never[] +} + +/** + * Logical representation of a file chunk. When large file is added to IPFS + * it gets chunked into smaller pieces (according to specified `--chunker`) + * and each chunk is encoded into this representation (and linked from file + * DAG). + * + * Note: While technically it is possible to add `mode` and `mtime` to + * `FileChunk` node it is logical nonsense and therefor to be ignore. + */ +export interface FileChunk extends PBNode { + Data: ByteView<{ + Type: DataType.File, + /** + * Raw bytes corresponding to this file chunk + */ + Data: Bytes + /** + * Number of bytes in Data field + */ + filesize: uint64 + }> + /** + * Raw nodes MUST not have any links, yet empty `Links` list is expected. + * At the type level it is expressed as `never[]` which guarantees that + * no instatiation other than empty will satisfy this constraint + */ + Links: never[] +} + +export type FileLeaf = + /** + * May link to raw block (Not to confuse with UnixFS Raw nodes). Happens + * when `--raw-leaves` option is used. + */ + | RawNode + /** + * @TODO - I have no idea when this happens please help! + */ + | RawChunk + /** + * Node links to actual chunks when it's a level above leaf nodes. + */ + | FileChunk + +/** + * Logical representation of a file shard. When large files are chunked + * slices that span multiple chunks may be represented via file shards. + * + * Please note that some file layouts may create shallow DAGs where file root + * node links to `FileChunk|Raw` nodes and in other layouts DAGs may be + * several levels deep, in those cases file root link to `FileShard` nodes + * that link to either other shards or leaf `FileChunk|Raw` nodes. + * + * Note: While technically it is possible to add `mode` and `mtime` to + * `FileShard` nodes it is logical nonsense and therefor to be ignore. + */ +export interface FileShard extends PBNode { + Data: ByteView<{ + Type: DataType.File, + /** + * The total number of bytes of a file slize represented by this shard. + */ + filesize: uint64 + /** + * List of `filesize`s for each linked node (in exact same order). + */ + blocksizes: uint64[] + }> + /** + * Links to the file slices this shard is comprised of. + * Note: That this is heterogeneous list as e.g. in trickle DAG layout + * shards may link to both leaves and other shards. + */ + Links: FileLink[] +} + +/** + * Logical representation of a file that fits a single block. Note is + * semantically different from a `FileChunk`, even though structurally it is + * compatible. Unlike `FileChunk` it may contain `mode`, `mtime` file metadata. + */ +export interface SimpleFileLayout extends PBNode { + Data: ByteView<{ + Type: DataType.File, + /** + * Raw bytes of the file content + */ + Data: Bytes + /** + * Number of bytes in above `Data` field + */ + filesize: uint64, + + /** + * If omitted to be interprented as default `0644`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + + /** + * MUST not have any links, yet empty `Links` list is expected. + * At the type level it is expressed as `never[]` which guarantees that + * no instatiation other than empty will satisfy this constraint + */ + Links: never[] +} + +/** + * Logical represenatation of a file that consists of multiple blocks. Note is + * semantically different from a `FileShard`, even though structurally it is + * compatible. Unlike `FileShard` it may contain `mode`, `mtime` file metadata. + */ + +export interface AdvancedFileLayout extends PBNode { + Data: ByteView<{ + Type: DataType.File, + // Total number of bytes in the file (not the graph structure). + filesize: uint64, + + /** + * List of `filesize`s for each linked node (in exact same order). + */ + blocksizes: uint64[] + + /** + * If omitted to be interprented as default `0644`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + /** + * Links to the file slices this file is comprised of. + * Note: That this is heterogeneous list as e.g. in trickle DAG layout + * shards may link to both leaves and other shards. + */ + Links: FileLink[] +} + +/** + * In IPFS large files are chucked into several blocks for a more effective + * replication. Such files in UnixFS are represented via `AdvancedFileLayout`. + * And files that fit into a single block are represented via `SimpleFileLayout`. + * + * Please note: In some configurations files that fit a single block are not + * even encoded as UnixFS but rather as a `RawNode` blocks. However this type + * describes UinxFS File representation and not IPFS file representation which + * is why `RawNode` variant is not part of it. + */ +export type FileLayout = + | SimpleFileLayout + | AdvancedFileLayout + + +/** + * Logacal representation of a directory that fits single block. + */ +export interface FlatDirectoryLayout extends PBNode { + Data: ByteView<{ + Type: DataType.Directory + /** + * Directories MUST have `filesize` set to `0`. + */ + filesize: 0 + /** + * If omitted to be interpreted as default `0755`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + /** + * Links are directory entries. + */ + Links: DirectoryLink[] +} + + +/** + * Logical representation of directory encoded in multiple blocks (usually when + * it contains large number of entries). Such directories are represented via + * Hash Array Map Tries (HAMT). + * + * @see https://en.wikipedia.org/wiki/Hash_array_mapped_trie + */ +export interface AdvancedDirectoryLayout { + Data: ByteView<{ + // TODO: Is root still a directory type or is it going to be HAMT instead ? + Type: DataType.Directory, + Data: ByteView + /* + * HAMT table width (In IPFS it's usually 256) + */ + fanout: uint64, + /** + * Multihash code for the hashing function used (In IPFS it's [murmur3-64][]) + * + * [murmur3-64]:https://github.com/multiformats/multicodec/blob/master/table.csv#L24 + */ + hashType: uint64, + + /** + * If omitted to be interpreted as default `0755`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + Links: ShardLink[] +} + +export interface ShardLink extends PBLink { + /** + * + */ + Name: string + Tsize: number +} + +/** + * + */ +export interface DirectoryShard extends PBNode { + Data: ByteView<{ + Type: DataType.HAMTShard, + Data: ByteView + /* + * HAMT table width (In IPFS it's usually 256) + */ + fanout: uint64, + /** + * Multihash code for the hashing function used (In IPFS it's [murmur3-64][]) + * + * [murmur3-64]:https://github.com/multiformats/multicodec/blob/master/table.csv#L24 + */ + hashType: uint64, + }> + /** + * Either links to other shards or actual directory entries + */ + Links: ShardLink[]|DirectoryLink[] +} + +/** + * Type for either UnixFS directory representation. + */ +export type DirectoryLayout = + | FlatDirectoryLayout + | AdvancedDirectoryLayout + +/** + * All nodes that have `file` type. + */ +export type FileNode = + | FileChunk + | FileLayout + +/** + * All nodes that have `directory` type. + */ +export type DirectoryNode = + | FlatDirectoryLayout + | AdvancedDirectoryLayout + +/** + * @TODO + */ +export type MetadataNode = never + +/** + * @TODO + */ +export type SymlinkNode = never + +/** + * Type representing any UnixFS node. + */ +export type UnixFS = + | RawChunk + | DirectoryNode + | FileNode + | MetadataNode + | SymlinkNode + | DirectoryShard + +export enum DataType { + Raw = 0, + Directory = 1, + File = 2, + /** + * TODO: Have not came across this one would be nice to either mark + * or entype it's represenation deprecated + */ + Metadata = 3, + /** + * TODO: Have not came across this one either, I'm not sure how it supposed + * to be represented. If not used in practice maybe it sholud be marked + * deprecated. + */ + Symlink = 4, + HAMTShard = 5, +} + +/** + * representing the modification time in seconds relative to the unix epoch + * 1970-01-01T00:00:00Z. + */ +export interface UnixTime { + /** + * (signed 64bit integer): represents the amount of seconds after or before + * the epoch. + */ + readonly Seconds: int64; + + /** + * (optional, 32bit unsigned integer ): when specified represents the + * fractional part of the mtime as the amount of nanoseconds. The valid + * range for this value are the integers [1, 999999999]. + */ + readonly FractionalNanoseconds?: fixed32 +} + +/** + * The mode is for persisting the file permissions in [numeric notation]. + * If unspecified this defaults to + * - `0755` for directories/HAMT shards + * - `0644` for all other types where applicable + * + * The nine least significant bits represent `ugo-rwx` + * The next three least significant bits represent setuid, setgid and the sticky bit. + * The remaining 20 bits are reserved for future use, and are subject to change. + * Spec implementations MUST handle bits they do not expect as follows: + * - For future-proofing the (de)serialization layer must preserve the entire + * `uint32` value during clone/copy operations, modifying only bit values that + * have a well defined meaning: + * `clonedValue = ( modifiedBits & 07777 ) | ( originalValue & 0xFFFFF000 )` + * - Implementations of this spec MUST proactively mask off bits without a + * defined meaning in the implemented version of the spec: + * `interpretedValue = originalValue & 07777` + + * + * [numeric notation]:https://en.wikipedia.org/wiki/File-system_permissions#Numeric_notation + * + * @see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html + */ +export type Mode = uint32; + + +/** + * Less loosely defined PB Link used by files. + */ +export interface FileLink extends PBLink { + Hash: CID + /** + * File links MUST specify `TSize` for the linked slice. + */ + + Tsize: uint64 + /** + * File links SHOULD NOT specify `Name` for the links. + * TODO: JS actually uses `''` so maybe that is what type should say. + */ + Name?: never +} + +/** + * Less loosely dhefined PB Link used by directories. + */ +export interface DirectoryLink extends PBLink { + Hash: CID + /** + * Directory link SHOULD specify size of the entry. + */ + Tsize: uint64 + /** + * Directory link MUST specify link name which is a name for the directory + * entry. + */ + Name: string +} + + +/** + * Logical representation of DAG-PB Node + * @see https://ipld.io/specs/codecs/dag-pb/spec/ + */ + +export interface PBNode { + Data: ByteView + Links: PBLink[] +} + +/** + * Logical representation of DAG-PB link + * @see https://ipld.io/specs/codecs/dag-pb/spec/ + */ + +export interface PBLink { + Hash: CID + Tsize?: number + Name?: string +} + + +/** + * Logical representation of raw binary nodes as raw IPLD codec + * @see https://github.com/multiformats/multicodec/blob/master/table.csv#L39 + */ +export interface RawNode extends Bytes { +} + +/** + * Logical representation of *C*ontent *Id*entifier, where `C` is a logical + * representation of the content it identifies. + * + * Note: This is not an actual definition used by (JS) IPFS but a more + * appropriate definition to convey desired semantics. + */ +export interface CID extends Phantom {} + +/** + * Represents byte encoded representation of the `Data`. It uses type parameter + * to capture the structure of the data it encodes. + */ +export interface ByteView extends Phantom {} + + + + +// JS/TS specific type definitions that are not really relevant and +// could be ignored. Mostly present so file type checks, although +// may also be useful for JS/TS reader. + + +/** + * Type representing raw bytes, in JS it's usually Uint8Array. Use type + * alias so it's less JS sepcific. + */ +export type Bytes = Uint8Array + +/** + * @see https://github.com/ipfs/go-bitfield + */ +export type Bitfield = Uint8Array + +// TS does not really have these, create aliases so it's aligned closer +// to protobuf spec +export type int64 = number +export type fixed32 = number +export type uint64 = number + +export type uint32 = number + + +/** + * This is an utility type to retain unused type parameter `T`. It can be used + * as nominal type e.g. to capture semantics not represented in actual type strucutre. + */ +export interface Phantom { + // This field can not be represented because field name is non-existings + // unique symbol. But given that field is optional any object will valid + // type contstraint. + [PhantomKey]?: T +} + +declare const PhantomKey: unique symbol From 445ec9a3bda60aa405bf40b762957041311c4f0d Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Mon, 31 Jan 2022 23:06:04 -0800 Subject: [PATCH 2/8] Apply suggestions from code review --- unixfs.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index ec2b939fc..e8bef7e07 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -208,8 +208,7 @@ export interface FlatDirectoryLayout extends PBNode { */ export interface AdvancedDirectoryLayout { Data: ByteView<{ - // TODO: Is root still a directory type or is it going to be HAMT instead ? - Type: DataType.Directory, + Type: DataType.HAMTShard, Data: ByteView /* * HAMT table width (In IPFS it's usually 256) From be9c0e808efa047fb30959e60b3f5aef01e032e9 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Mon, 31 Jan 2022 23:12:24 -0800 Subject: [PATCH 3/8] Simplify link defs --- unixfs.ts | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index e8bef7e07..5c9eff751 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -92,7 +92,7 @@ export interface FileShard extends PBNode { * Note: That this is heterogeneous list as e.g. in trickle DAG layout * shards may link to both leaves and other shards. */ - Links: FileLink[] + Links: AnonymousLink[] } /** @@ -157,7 +157,7 @@ export interface AdvancedFileLayout extends PBNode { * Note: That this is heterogeneous list as e.g. in trickle DAG layout * shards may link to both leaves and other shards. */ - Links: FileLink[] + Links: AnonymousLink[] } /** @@ -195,7 +195,7 @@ export interface FlatDirectoryLayout extends PBNode { /** * Links are directory entries. */ - Links: DirectoryLink[] + Links: NamedLink[] } @@ -228,15 +228,7 @@ export interface AdvancedDirectoryLayout { mode?: Mode mtime?: UnixTime }> - Links: ShardLink[] -} - -export interface ShardLink extends PBLink { - /** - * - */ - Name: string - Tsize: number + Links: NamedLink[] } /** @@ -260,7 +252,7 @@ export interface DirectoryShard extends PBNode { /** * Either links to other shards or actual directory entries */ - Links: ShardLink[]|DirectoryLink[] + Links: NamedLink[] } /** @@ -369,9 +361,10 @@ export type Mode = uint32; /** - * Less loosely defined PB Link used by files. + * Less loosely defined PB Link which requires TSize and does not + * require `Name`. */ -export interface FileLink extends PBLink { +export interface AnonymousLink extends PBLink { Hash: CID /** * File links MUST specify `TSize` for the linked slice. @@ -386,9 +379,9 @@ export interface FileLink extends PBLink { } /** - * Less loosely dhefined PB Link used by directories. + * Less loosely defined PB Link that requires `Name` field. */ -export interface DirectoryLink extends PBLink { +export interface NamedLink extends PBLink { Hash: CID /** * Directory link SHOULD specify size of the entry. From 97548e2276d26cb061fb547d9717ed4f454f2604 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Tue, 1 Feb 2022 11:18:50 -0800 Subject: [PATCH 4/8] Apply suggestions from code review Co-authored-by: Alan Shaw --- unixfs.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index 5c9eff751..6df310a8d 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -412,7 +412,7 @@ export interface PBNode { export interface PBLink { Hash: CID - Tsize?: number + Tsize?: uint64 Name?: string } @@ -469,7 +469,7 @@ export type uint32 = number /** * This is an utility type to retain unused type parameter `T`. It can be used - * as nominal type e.g. to capture semantics not represented in actual type strucutre. + * as nominal type e.g. to capture semantics not represented in actual type structure. */ export interface Phantom { // This field can not be represented because field name is non-existings From 96b46903193b9b0ca7d8b95c1b0b9f0e37881599 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Tue, 1 Feb 2022 11:25:55 -0800 Subject: [PATCH 5/8] Remove redundant types and define symlink --- unixfs.ts | 47 +++++++++++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index 6df310a8d..3e3d4303d 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -262,19 +262,6 @@ export type DirectoryLayout = | FlatDirectoryLayout | AdvancedDirectoryLayout -/** - * All nodes that have `file` type. - */ -export type FileNode = - | FileChunk - | FileLayout - -/** - * All nodes that have `directory` type. - */ -export type DirectoryNode = - | FlatDirectoryLayout - | AdvancedDirectoryLayout /** * @TODO @@ -284,17 +271,38 @@ export type MetadataNode = never /** * @TODO */ -export type SymlinkNode = never +export interface Symlink extends PBNode { + Data: ByteView<{ + Type: DataType.Symlink, + /** + * UTF-8 encoded path to the symlink target. + */ + Data: ByteView + /** + * Number of bytes in Data field + */ + filesize: uint64 + }> + /** + * Symlink nodes MUST not have any links, yet empty `Links` list is expected. + * At the type level it is expressed as `never[]` which guarantees that + * no instatiation other than empty will satisfy this constraint. + * + * Decoder implementation SHOULD ignore links even if present. + */ + Links: never[] +} /** * Type representing any UnixFS node. */ export type UnixFS = | RawChunk - | DirectoryNode - | FileNode + | DirectoryLayout + | FileChunk + | FileLayout | MetadataNode - | SymlinkNode + | Symlink | DirectoryShard export enum DataType { @@ -306,11 +314,6 @@ export enum DataType { * or entype it's represenation deprecated */ Metadata = 3, - /** - * TODO: Have not came across this one either, I'm not sure how it supposed - * to be represented. If not used in practice maybe it sholud be marked - * deprecated. - */ Symlink = 4, HAMTShard = 5, } From a7fdbeb1095627240c4e9fa53214b83d28101401 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Tue, 1 Feb 2022 11:30:14 -0800 Subject: [PATCH 6/8] Update Symlink description --- unixfs.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/unixfs.ts b/unixfs.ts index 3e3d4303d..5a7a20fb0 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -269,7 +269,11 @@ export type DirectoryLayout = export type MetadataNode = never /** - * @TODO +/** + * Logical representation of a [symbolic link][] . + * + * [symbolic link]:https://en.wikipedia.org/wiki/Symbolic_link + */ */ export interface Symlink extends PBNode { Data: ByteView<{ From 8d241486073a4956ac1f1289756417a9df4f2970 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Tue, 1 Feb 2022 15:29:03 -0800 Subject: [PATCH 7/8] Incorporate feedback & corrections from @ribasushi --- unixfs.ts | 271 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 213 insertions(+), 58 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index 5a7a20fb0..9854d020b 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -1,9 +1,25 @@ /** - * Logical representation of a file chunk. + * Logical representation of a file chunk (a leaf node of the file DAG layout). + * This representation had been subsumed by `FileChunk` representation and + * therefor is marked as deprecated. + * + * UnixFS consumers are very likely to encounter nodes of this type, as of this + * writing JS & Go implementations can be configured to produce these nodes, in + * trickle DAG use this configuration. + * + * UnixFS producers are RECOMMENDED to either use `FileChunk` representation or + * better yet raw binary nodes (That is 0x55 multicodec) which will likely + * relpace them in the future. + * + * @see https://github.com/multiformats/multicodec/blob/master/table.csv#L39 * - * TODO: Clarify when this represenation is used as opposed to `FileChunk`. + * @deprecated */ export interface RawChunk extends PBNode { + /** + * While actual protobuf may include other fields consumers are recommended + * to ignore them. + */ Data: ByteView<{ Type: DataType.Raw, /** @@ -14,19 +30,39 @@ export interface RawChunk extends PBNode { /** * Raw nodes MUST not have any links, yet empty `Links` list is expected. * At the type level it is expressed as `never[]` which guarantees that - * no instatiation other than empty will satisfy this constraint + * no instatiation other than empty will satisfy this constraint. + * + * Consumer of `Raw` nodes SHOULD ignore all links even they are present + * in the block. + * + * @TODO Verify this is accurate maybe they are treated just as files with + * `ComplexFileLayout`. */ Links: never[] } /** - * Logical representation of a file chunk. When large file is added to IPFS - * it gets chunked into smaller pieces (according to specified `--chunker`) - * and each chunk is encoded into this representation (and linked from file - * DAG). + * Logical representation of a file chunk (a leaf node of the file DAG layout). * - * Note: While technically it is possible to add `mode` and `mtime` to - * `FileChunk` node it is logical nonsense and therefor to be ignore. + * When large file is added to IPFS it gets chunked into smaller pieces + * (according to the `--chunker` specified) and each chunk is encoded into this + * representation (and linked from file DAG). Please note that in practice there + * are many other representations fro file chunks (leaf nodes) like `RawChunk`s + * (deprecated in favor of this representation) and raw binary nodes (That is + * 0x55 multicodec) which are on a way to surpass this representation. + * + * Please note that in reality there is only one `file` node with many optional + * fields, however different combination of fields corresponds to a different + * semntaics and we represent each by a different type. + * + * Also note that some file nodes may also have `mode` and `mtime` fields, + * which we represent via `SimpleFileLayout` type however that is not completely + * accurate, e.g because one could take two `SimpleFileLayout`s and represent + * their concatination via `AdvancedDirectoryLayout` by linking to them. In such + * a scenario consumer should treat leaves as `FileChunk`s and SHOULD ignore + * `mode` and `mtime` fileds on them. However if those leves are accessed as + * files consumer SHOULD treat them as `SimpleFileLayout` and SHOULD NOT ignore + * `mode` and `mtime` fields. */ export interface FileChunk extends PBNode { Data: ByteView<{ @@ -41,9 +77,7 @@ export interface FileChunk extends PBNode { filesize: uint64 }> /** - * Raw nodes MUST not have any links, yet empty `Links` list is expected. - * At the type level it is expressed as `never[]` which guarantees that - * no instatiation other than empty will satisfy this constraint + * File chunks are leaf nodes and therefor are not supposed to have any links. */ Links: never[] } @@ -65,15 +99,21 @@ export type FileLeaf = /** * Logical representation of a file shard. When large files are chunked - * slices that span multiple chunks may be represented via file shards. + * slices that span multiple blocks may be represented via file shards in + * certain DAG layouts (e.g. balanced & trickle DAGs). * - * Please note that some file layouts may create shallow DAGs where file root - * node links to `FileChunk|Raw` nodes and in other layouts DAGs may be - * several levels deep, in those cases file root link to `FileShard` nodes - * that link to either other shards or leaf `FileChunk|Raw` nodes. + * Please note in practice there is only one `file` node type with many optional + * fields. Different combination of fields corresponds to a different semntaics + * and combination of fields in this type represent a branch nodes in the file + * DAGs where nodes beside leaves and root exist. * - * Note: While technically it is possible to add `mode` and `mtime` to - * `FileShard` nodes it is logical nonsense and therefor to be ignore. + * Also note that you may encounter `FileShard`s with `mode` and `mtime` fields + * which according to our categorization would fall under `AdvancedFileLayout` + * category, however just as with `FileChunk` / `SimpleFileLayout` here as well + * you should treat node as `AdvancedFileLayout` if you encounter it in the + * root position (that is to say regard `mode`, `mtime` field) and treat it as + * `FileShard` node if encountered further down the DAG (that is ignore `mode`, + * `mtime` fileds). */ export interface FileShard extends PBNode { Data: ByteView<{ @@ -88,17 +128,17 @@ export interface FileShard extends PBNode { blocksizes: uint64[] }> /** - * Links to the file slices this shard is comprised of. - * Note: That this is heterogeneous list as e.g. in trickle DAG layout - * shards may link to both leaves and other shards. + * Links to the file slices this shard is comprised of. Please note that in + * some layouts e.g. trickle DAG shards may link to both leaf nodes and other + * shards, which is why this list heterogeneous. */ Links: AnonymousLink[] } /** - * Logical representation of a file that fits a single block. Note is - * semantically different from a `FileChunk`, even though structurally it is - * compatible. Unlike `FileChunk` it may contain `mode`, `mtime` file metadata. + * Logical representation of a file that fits a single block. Note this is only + * semantically different from a `FileChunk` and your interpretation SHOULD vary + * depending on where you encounter the node (In root of the DAG or not). */ export interface SimpleFileLayout extends PBNode { Data: ByteView<{ @@ -121,9 +161,8 @@ export interface SimpleFileLayout extends PBNode { }> /** - * MUST not have any links, yet empty `Links` list is expected. - * At the type level it is expressed as `never[]` which guarantees that - * no instatiation other than empty will satisfy this constraint + * Simple files SHOULD NOT have any links as they are represented by single + * chunk. */ Links: never[] } @@ -160,6 +199,52 @@ export interface AdvancedFileLayout extends PBNode { Links: AnonymousLink[] } +/** + * These type of nodes are not produces by referenece IPFS implementations, yet + * such file nodes could be represented and therefor defined with this type. + * + * In this file representation first chunk of the file is represented by a + * `data` field while rest of the file is represented by links. + * + * It is NOT RECOMMENDED to use this representation (which is why it's marked + * deprecated), however it is still valid representation and UnixFS consumers + * SHOULD recognize it and interpret as described. + * + * @deprecated + */ +export interface ComplexFileLayout extends PBNode { + Data: ByteView<{ + Type: DataType.File, + /** + * Total number of bytes in the file (not the graph structure). Which is + * `data` size + sum of `blocksizes`. + */ + filesize: uint64, + + /** + * Represents content of the first chunk of the file. + */ + data: Bytes + + /** + * List of `filesize`s for each linked node (in exact same order). + */ + blocksizes: uint64[] + + /** + * If omitted to be interprented as default `0644`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + /** + * Links to the rest of the file slices, besides one in `data` field, this + * file is comprised of. + */ + Links: AnonymousLink[] +} + /** * In IPFS large files are chucked into several blocks for a more effective * replication. Such files in UnixFS are represented via `AdvancedFileLayout`. @@ -173,6 +258,7 @@ export interface AdvancedFileLayout extends PBNode { export type FileLayout = | SimpleFileLayout | AdvancedFileLayout + | ComplexFileLayout /** @@ -232,7 +318,12 @@ export interface AdvancedDirectoryLayout { } /** - * + * Logical represenatation of the shard of the sharded directory. Please note + * that it only semantically different from `AdvancedDirectoryLayout`, in + * practice they are the same and interpretation should vary based on view. If + * viewed form root position it is `AdvancedDirectoryLayout` and it's `mtime` + * `mode` field to be respected, otherwise it is `DirectoryShard` and it's + * `mtime` and `mode` field to be ignored. */ export interface DirectoryShard extends PBNode { Data: ByteView<{ @@ -264,17 +355,65 @@ export type DirectoryLayout = /** - * @TODO + * Metadata as a separate node type has been considered in varios forms but + * ultimately had been decided against in favor of optional `mode`, `mtime` + * fields on the file & directory nodes. + * + * Consumers are RECOMMENDED to treat `Metadata` nodes same as `file` nodes, + * that is: + * + * - If node has both `Links` and `Data` treat it as `ComplexFileLayout` + * - In node has `Data` but no `Links` treat it as `SimpleFileLayout` if + * encountered in root position and as `FileChunk` in any other position. + * - If node has `Links` and no `Data` treat it as `AdvancedFileLayout` if + * encountered in root position and as `FileShard` in any other position. + * - If node has neither `Data` nor `Links` treat it as `EmptyFile` in root + * position and as empty `FileShard` otherwise. + * + * + * Producers SHOULD NOT produce `Metadata` nodes and use appropriate `file` + * node instead. + * + * @deprecated */ -export type MetadataNode = never +export interface Metadata extends PBNode { + Data: ByteView<{ + Type: DataType.Metadata + /** + * Raw bytes of the file content + */ + Data?: Bytes + /** + * Number of bytes in above `Data` field + */ + filesize?: uint64, + + /** + * List of `filesize`s for each linked node (in exact same order). + */ + blocksizes: uint64[] + + /** + * If omitted to be interprented as default `0644`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime + }> + /** + * Links to the file slices this shard is comprised of. Please note that in + * some layouts e.g. trickle DAG shards may link to both leaf nodes and other + * shards, which is why this list heterogeneous. + */ + Links: AnonymousLink[] +} + /** -/** - * Logical representation of a [symbolic link][] . + * Logical representation of a [symbolic link][]. * * [symbolic link]:https://en.wikipedia.org/wiki/Symbolic_link */ - */ export interface Symlink extends PBNode { Data: ByteView<{ Type: DataType.Symlink, @@ -283,16 +422,23 @@ export interface Symlink extends PBNode { */ Data: ByteView /** - * Number of bytes in Data field + * In practice it may be present, in those cases it SHOULD be ignored. + * Producers should leave this field out. + * @deprecated */ - filesize: uint64 + filesize?: uint64 + + /** + * If omitted to be interprented as default `0644`. It is RECOMMENDED + * to omit if mode matches default. + */ + mode?: Mode + mtime?: UnixTime }> /** - * Symlink nodes MUST not have any links, yet empty `Links` list is expected. - * At the type level it is expressed as `never[]` which guarantees that - * no instatiation other than empty will satisfy this constraint. + * Symlink nodes MUST not have any links. * - * Decoder implementation SHOULD ignore links even if present. + * Consumers SHOULD ignore links if they are present. */ Links: never[] } @@ -302,21 +448,17 @@ export interface Symlink extends PBNode { */ export type UnixFS = | RawChunk - | DirectoryLayout | FileChunk | FileLayout - | MetadataNode - | Symlink | DirectoryShard + | DirectoryLayout + | Metadata + | Symlink export enum DataType { Raw = 0, Directory = 1, File = 2, - /** - * TODO: Have not came across this one would be nice to either mark - * or entype it's represenation deprecated - */ Metadata = 3, Symlink = 4, HAMTShard = 5, @@ -374,31 +516,34 @@ export type Mode = uint32; export interface AnonymousLink extends PBLink { Hash: CID /** - * File links MUST specify `TSize` for the linked slice. + * UnixFS links MUST specify size of the linked block. */ Tsize: uint64 /** - * File links SHOULD NOT specify `Name` for the links. - * TODO: JS actually uses `''` so maybe that is what type should say. + * Anonymoust links (e.g. links from files) SHOULD NOT specify `Name` on + * links. + * + * Consumers SHOULD ignore `Name` even if present. */ Name?: never } /** - * Less loosely defined PB Link that requires `Name` field. + * Less loosely defined PB Link that requires `Name` and `TSize` fields. */ export interface NamedLink extends PBLink { Hash: CID /** - * Directory link SHOULD specify size of the entry. + * Named links (e.g. directory links) MUST specify link name. In case of + * directories those are interpreted as entry names. */ - Tsize: uint64 + Name: string + /** - * Directory link MUST specify link name which is a name for the directory - * entry. + * UnixFS links MUST specify size of the linked block. */ - Name: string + Tsize: uint64 } @@ -408,7 +553,7 @@ export interface NamedLink extends PBLink { */ export interface PBNode { - Data: ByteView + Data?: ByteView Links: PBLink[] } @@ -418,9 +563,19 @@ export interface PBNode { */ export interface PBLink { + /** + * Binary CID of the target node. + */ Hash: CID - Tsize?: uint64 + /** + * UTF-8 string name + */ Name?: string + + /** + * Cumulative size of target node. + */ + Tsize?: uint64 } From b36951302c601dda3554f02837146c247d9a05d4 Mon Sep 17 00:00:00 2001 From: Irakli Gozalishvili Date: Tue, 1 Feb 2022 23:15:34 -0800 Subject: [PATCH 8/8] Apply suggestions from code review --- unixfs.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/unixfs.ts b/unixfs.ts index 9854d020b..0a3367404 100644 --- a/unixfs.ts +++ b/unixfs.ts @@ -58,7 +58,7 @@ export interface RawChunk extends PBNode { * Also note that some file nodes may also have `mode` and `mtime` fields, * which we represent via `SimpleFileLayout` type however that is not completely * accurate, e.g because one could take two `SimpleFileLayout`s and represent - * their concatination via `AdvancedDirectoryLayout` by linking to them. In such + * their concatination via `AdvancedFileLayout` by linking to them. In such * a scenario consumer should treat leaves as `FileChunk`s and SHOULD ignore * `mode` and `mtime` fileds on them. However if those leves are accessed as * files consumer SHOULD treat them as `SimpleFileLayout` and SHOULD NOT ignore @@ -516,7 +516,8 @@ export type Mode = uint32; export interface AnonymousLink extends PBLink { Hash: CID /** - * UnixFS links MUST specify size of the linked block. + * UnixFS links SHOULD specify size of the linked subgraph + * (cumulative size of linked block and the blocks under it) */ Tsize: uint64 @@ -541,7 +542,8 @@ export interface NamedLink extends PBLink { Name: string /** - * UnixFS links MUST specify size of the linked block. + * UnixFS links SHOULD specify size of the linked subgraph + * (cumulative size of linked block and the blocks under it) */ Tsize: uint64 }