Skip to content

Commit

Permalink
fastwalk: add new dirent package for parsing Unix dirents
Browse files Browse the repository at this point in the history
This commit updates the Dirent parsing logic to match what the go1.22
stdlib uses.

This is an attempt to fix FZF issue:
junegunn/fzf#3706
  • Loading branch information
charlievieth committed Apr 4, 2024
1 parent 596420e commit 9cba5f4
Show file tree
Hide file tree
Showing 16 changed files with 515 additions and 107 deletions.
62 changes: 6 additions & 56 deletions fastwalk_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
package fastwalk

import (
"fmt"
"io/fs"
"os"
"syscall"
"unsafe"

"github.com/charlievieth/fastwalk/internal/dirent"
)

const blockSize = 8 << 10
// More than 5760 to work around https://golang.org/issue/24015.
const blockSize = 8192

// unknownFileMode is a sentinel (and bogus) os.FileMode
// value used to represent a syscall.DT_UNKNOWN Dirent.Type.
Expand Down Expand Up @@ -45,8 +46,9 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er
return nil
}
}
consumed, name, typ := parseDirEnt(buf[bufp:nbuf])
consumed, name, typ := dirent.Parse(buf[bufp:nbuf])
bufp += consumed

if name == "" || name == "." || name == ".." {
continue
}
Expand Down Expand Up @@ -78,58 +80,6 @@ func readDir(dirName string, fn func(dirName, entName string, de fs.DirEntry) er
}
}

func parseDirEnt(buf []byte) (consumed int, name string, typ os.FileMode) {
// golang.org/issue/37269
dirent := &syscall.Dirent{}
copy((*[unsafe.Sizeof(syscall.Dirent{})]byte)(unsafe.Pointer(dirent))[:], buf)
if v := unsafe.Offsetof(dirent.Reclen) + unsafe.Sizeof(dirent.Reclen); uintptr(len(buf)) < v {
panic(fmt.Sprintf("buf size of %d smaller than dirent header size %d", len(buf), v))
}
if len(buf) < int(dirent.Reclen) {
panic(fmt.Sprintf("buf size %d < record length %d", len(buf), dirent.Reclen))
}
consumed = int(dirent.Reclen)
if direntInode(dirent) == 0 { // File absent in directory.
return
}
switch dirent.Type {
case syscall.DT_REG:
typ = 0
case syscall.DT_DIR:
typ = os.ModeDir
case syscall.DT_LNK:
typ = os.ModeSymlink
case syscall.DT_BLK:
typ = os.ModeDevice
case syscall.DT_FIFO:
typ = os.ModeNamedPipe
case syscall.DT_SOCK:
typ = os.ModeSocket
case syscall.DT_UNKNOWN:
typ = unknownFileMode
default:
// Skip weird things.
// It's probably a DT_WHT (http://lwn.net/Articles/325369/)
// or something. Revisit if/when this package is moved outside
// of goimports. goimports only cares about regular files,
// symlinks, and directories.
return
}

nameBuf := (*[unsafe.Sizeof(dirent.Name)]byte)(unsafe.Pointer(&dirent.Name[0]))
nameLen := direntNamlen(dirent)

// Special cases for common things:
if nameLen == 1 && nameBuf[0] == '.' {
name = "."
} else if nameLen == 2 && nameBuf[0] == '.' && nameBuf[1] == '.' {
name = ".."
} else {
name = string(nameBuf[:nameLen])
}
return
}

// According to https://golang.org/doc/go1.14#runtime
// A consequence of the implementation of preemption is that on Unix systems, including Linux and macOS
// systems, programs built with Go 1.14 will receive more signals than programs built with earlier releases.
Expand Down
18 changes: 0 additions & 18 deletions fastwalk_unix_bsd.go

This file was deleted.

33 changes: 0 additions & 33 deletions fastwalk_unix_linux.go

This file was deleted.

109 changes: 109 additions & 0 deletions internal/dirent/dirent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//go:build aix || dragonfly || freebsd || (js && wasm) || wasip1 || linux || netbsd || openbsd || solaris

package dirent

import (
"os"
"runtime"
"syscall"
"unsafe"
)

// readInt returns the size-bytes unsigned integer in native byte order at offset off.
func readInt(b []byte, off, size uintptr) (u uint64, ok bool) {
if len(b) < int(off+size) {
return 0, false
}
if isBigEndian {
return readIntBE(b[off:], size), true
}
return readIntLE(b[off:], size), true
}

func readIntBE(b []byte, size uintptr) uint64 {
switch size {
case 1:
return uint64(b[0])
case 2:
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[1]) | uint64(b[0])<<8
case 4:
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[3]) | uint64(b[2])<<8 | uint64(b[1])<<16 | uint64(b[0])<<24
case 8:
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
default:
panic("syscall: readInt with unsupported size")
}
}

func readIntLE(b []byte, size uintptr) uint64 {
switch size {
case 1:
return uint64(b[0])
case 2:
_ = b[1] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8
case 4:
_ = b[3] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24
case 8:
_ = b[7] // bounds check hint to compiler; see golang.org/issue/14808
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
default:
panic("syscall: readInt with unsupported size")
}
}

const InvalidMode = os.FileMode(1<<32 - 1)

func Parse(buf []byte) (consumed int, name string, typ os.FileMode) {

reclen, ok := direntReclen(buf)
if !ok || reclen > uint64(len(buf)) {
// WARN: this is a hard error because we consumed 0 bytes
// and not stopping here could lead to an infinite loop.
return 0, "", InvalidMode
}
consumed = int(reclen)
rec := buf[:reclen]

ino, ok := direntIno(rec)
if !ok {
return consumed, "", InvalidMode
}
// When building to wasip1, the host runtime might be running on Windows
// or might expose a remote file system which does not have the concept
// of inodes. Therefore, we cannot make the assumption that it is safe
// to skip entries with zero inodes.
if ino == 0 && runtime.GOOS != "wasip1" {
return consumed, "", InvalidMode
}

typ = direntType(buf)

const namoff = uint64(unsafe.Offsetof(syscall.Dirent{}.Name))
namlen, ok := direntNamlen(rec)
if !ok || namoff+namlen > uint64(len(rec)) {
return consumed, "", InvalidMode
}
namebuf := rec[namoff : namoff+namlen]
for i, c := range namebuf {
if c == 0 {
namebuf = namebuf[:i]
break
}
}
// Check for useless names before allocating a string.
if string(namebuf) == "." {
name = "."
} else if string(namebuf) == ".." {
name = ".."
} else {
name = string(namebuf)
}
return consumed, name, typ
}
29 changes: 29 additions & 0 deletions internal/dirent/dirent_aix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//go:build aix

package dirent

import (
"os"
"syscall"
"unsafe"
)

func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Ino), unsafe.Sizeof(syscall.Dirent{}.Ino))
}

func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}

func direntNamlen(buf []byte) (uint64, bool) {
reclen, ok := direntReclen(buf)
if !ok {
return 0, false
}
return reclen - uint64(unsafe.Offsetof(syscall.Dirent{}.Name)), true
}

func direntType(buf []byte) os.FileMode {
return ^os.FileMode(0) // unknown
}
54 changes: 54 additions & 0 deletions internal/dirent/dirent_dragonfly.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
//go:build dragonfly

package dirent

import (
"os"
"syscall"
"unsafe"
)

func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno))
}

func direntReclen(buf []byte) (uint64, bool) {
namlen, ok := direntNamlen(buf)
if !ok {
return 0, false
}
return (16 + namlen + 1 + 7) &^ 7, true
}

func direntNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}

func direntType(buf []byte) os.FileMode {
off := unsafe.Offsetof(syscall.Dirent{}.Type)
if off >= uintptr(len(buf)) {
return ^os.FileMode(0) // unknown
}
typ := buf[off]
switch typ {
case syscall.DT_BLK:
return os.ModeDevice
case syscall.DT_CHR:
return os.ModeDevice | os.ModeCharDevice
case syscall.DT_DBF:
// DT_DBF is "database record file".
// fillFileStatFromSys treats as regular file.
return 0
case syscall.DT_DIR:
return os.ModeDir
case syscall.DT_FIFO:
return os.ModeNamedPipe
case syscall.DT_LNK:
return os.ModeSymlink
case syscall.DT_REG:
return 0
case syscall.DT_SOCK:
return os.ModeSocket
}
return ^os.FileMode(0) // unknown
}
46 changes: 46 additions & 0 deletions internal/dirent/dirent_freebsd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//go:build freebsd

package dirent

import (
"os"
"syscall"
"unsafe"
)

func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Fileno), unsafe.Sizeof(syscall.Dirent{}.Fileno))
}

func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Reclen), unsafe.Sizeof(syscall.Dirent{}.Reclen))
}

func direntNamlen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(syscall.Dirent{}.Namlen), unsafe.Sizeof(syscall.Dirent{}.Namlen))
}

func direntType(buf []byte) os.FileMode {
off := unsafe.Offsetof(syscall.Dirent{}.Type)
if off >= uintptr(len(buf)) {
return ^os.FileMode(0) // unknown
}
typ := buf[off]
switch typ {
case syscall.DT_BLK:
return os.ModeDevice
case syscall.DT_CHR:
return os.ModeDevice | os.ModeCharDevice
case syscall.DT_DIR:
return os.ModeDir
case syscall.DT_FIFO:
return os.ModeNamedPipe
case syscall.DT_LNK:
return os.ModeSymlink
case syscall.DT_REG:
return 0
case syscall.DT_SOCK:
return os.ModeSocket
}
return ^os.FileMode(0) // unknown
}

0 comments on commit 9cba5f4

Please sign in to comment.