dvyukov · disconnect3d · Jan 7, 2022 · Jan 14, 2022 · Jan 28, 2022 · Jan 28, 2022
diff --git a/README.md b/README.md
@@ -148,6 +148,13 @@ Vendoring with modules is not yet supported. A `vendor` directory will be ignore
 Note that while modules are used to prepare the build, the final instrumented build is still done in GOPATH mode.
 For most modules, this should not matter.
 
+## Fuzzing dictionaries
+
+Go-fuzz supports user-defined dictionaries containing tokens or interesting byte sequences. Dictionaries replace the 
+low-signal token list that is automatically generated by go-fuzz with a high-signal token list. 
+Use `-dict DICTIONARY_FILE` to provide a dictionary. The dictionary syntax is the same as AFL/Libfuzzer. 
+See [AFL Dictionaries](https://github.com/google/AFL/blob/master/dictionaries/README.dictionaries) for more information.
+
 ## libFuzzer support
 
 go-fuzz-build can also generate an archive file

diff --git a/go-fuzz/hub.go b/go-fuzz/hub.go
@@ -4,10 +4,14 @@
 package main
 
 import (
+	"bytes"
+	"encoding/hex"
 	"fmt"
+	"io/ioutil"
 	"log"
 	"net/rpc"
 	"path/filepath"
+	"strconv"
 	"sync"
 	"sync/atomic"
 	"time"
@@ -73,6 +77,102 @@ type Stats struct {
 	restarts uint64
 }
 
+func parseDictTokenLine(tokenLine *[]byte, tokenLineNo int) *[]byte {
+	var err error
+	metaDataMode := true
+	token := make([]byte, 0, len(*tokenLine))
+	tokenLevel := 0
+	for index := 0; index < len(*tokenLine); index++ {
+		switch (*tokenLine)[index] {
+		case byte('"'):
+			if !metaDataMode {
+				// If we are parsing the token (metaDataMode=false) the first " we encounter marks the end of the token
+				metaDataMode = !metaDataMode
+			} else if index == 0 || (*tokenLine)[index-1] == byte('=') {
+				// change the metaDataMode either directly or if a keyword is defined after an equal sign
+				metaDataMode = !metaDataMode
+			}
+			break
+		case byte('\\'):
+			// Handle escape sequence
+			if !metaDataMode {
+				index++
+				if index >= len(*tokenLine) {
+					log.Printf("dictionary token in line %d has incorrect format", tokenLineNo)
+					return nil
+				}
+				switch (*tokenLine)[index] {
+				case byte('"'), byte('\\'):
+					// Handle escaped quote (\") and escaped backslash (\\)
+					token = append(token, (*tokenLine)[index])
+					break
+
+				case byte('x'):
+					// Handle hexadecimal values (e.g. \xFF)
+					if index+2 >= len(*tokenLine) {
+						log.Printf("dictionary token in line %d has incorrect format", tokenLineNo)
+						return nil
+					}
+
+					hexBytes := make([]byte, 1)
+					_, errDecode := hex.Decode(hexBytes, (*tokenLine)[index+1:index+3])
+					if errDecode != nil {
+						log.Printf("dictionary token in line %d has incorrect format", tokenLineNo)
+						return nil
+					}
+
+					token = append(token, hexBytes[0])
+
+					index = index + 2
+					break
+
+				case byte('n'):
+					// Handle newline (\n)
+					token = append(token, byte('\n'))
+					break
+
+				case byte('t'):
+					// Handle tab (\t)
+					token = append(token, byte('\t'))
+					break
+				}
+			}
+			break
+		case byte('@'):
+			//Handle token level if metaDataMode
+			if metaDataMode && index+1 < len(*tokenLine) {
+				num := ""
+				for counter := 1; index+counter < len(*tokenLine); counter++ {
+					value := int((*tokenLine)[index+counter])
+					if 0x30 <= value && value <= 0x39 {
+						num = num + string(rune(value))
+					} else {
+						break
+					}
+				}
+				tokenLevel, err = strconv.Atoi(num)
+				if err != nil {
+					log.Printf("token level in dictionary line %d could not be parsed", tokenLineNo)
+					return nil
+				}
+			}
+			// Fallthrough if not metaDataMode to add the @ to the token
+			fallthrough
+		default:
+			if !metaDataMode {
+				token = append(token, (*tokenLine)[index])
+			}
+		}
+
+	}
+
+	// If the global dictLevel is equal or higher than the tokenLevel is added, otherwise it is ignored
+	if tokenLevel <= dictLevel {
+		return &token
+	}
+	return nil
+}
+
 func newHub(metadata MetaData) *Hub {
 	procs := *flagProcs
 	hub := &Hub{
@@ -116,6 +216,35 @@ func newHub(metadata MetaData) *Hub {
 			ro.intLits = append(ro.intLits, []byte(lit.Val))
 		}
 	}
+
+	if dictPath != "" {
+		/*
+			Replaces the low-signal token list with a user defined high-signal token list.
+			Existing tokens that were obtained through token capture and which are stored in ro.strLits are discarded.
+			The intLits tokens are not discarded and will be used. However the user can also specify integers as a
+			bytearray in the dictionary to use them as well.
+		*/
+		ro.strLits = nil // Discard existing tokens
+		dictionary, err := ioutil.ReadFile(dictPath)
+		if err != nil {
+			log.Fatalf("could not read tokens from %q: %v", dictPath, err)
+		}
+
+		for tokenLineNo, tokenLine := range bytes.Split(dictionary, []byte("\n")) {
+			// Ignore Comments
+			if bytes.HasPrefix(bytes.TrimSpace(tokenLine), []byte("#")) || len(tokenLine) == 0 {
+				continue
+			}
+			token := parseDictTokenLine(&tokenLine, tokenLineNo)
+			if token != nil {
+				// add token to ro.strLits
+				ro.strLits = append(ro.strLits, *token)
+			}
+
+		}
+
+	}
+
 	hub.ro.Store(ro)
 
 	go hub.loop()

diff --git a/go-fuzz/main.go b/go-fuzz/main.go
@@ -13,6 +13,8 @@ import (
 	"path/filepath"
 	"runtime"
 	"runtime/debug"
+	"strconv"
+	"strings"
 	"sync/atomic"
 	"syscall"
 	"time"
@@ -42,10 +44,14 @@ var (
 	flagSonar             = flag.Bool("sonar", true, "use sonar hints")
 	flagV                 = flag.Int("v", 0, "verbosity level")
 	flagHTTP              = flag.String("http", "", "HTTP server listen address (coordinator mode only)")
+	flagDict              = flag.String("dict", "", "optional fuzzer dictionary (using AFL/Libfuzzer format)")
 
 	shutdown        uint32
 	shutdownC       = make(chan struct{})
 	shutdownCleanup []func()
+
+	dictPath  = ""
+	dictLevel = 0
 )
 
 func main() {
@@ -57,6 +63,32 @@ func main() {
 		log.Fatalf("both -http and -worker are specified")
 	}
 
+	if *flagDict != "" {
+		// Check if the provided path exists
+		_, err := os.Stat(*flagDict)
+		if err != nil {
+			// If not it might be because a dictLevel was provided by appending @<num> to the dict path
+			atIndex := strings.LastIndex(*flagDict, "@")
+			if atIndex != -1 {
+				dictPath = (*flagDict)[:atIndex]
+				_, errStat := os.Stat(dictPath)
+				if errStat != nil {
+					log.Fatalf("cannot read dictionary file %q: %v", dictPath, err)
+				}
+				dictLevel, err = strconv.Atoi((*flagDict)[atIndex+1:])
+				if err != nil {
+					log.Printf("could not convert dict level using dict level 0 instead")
+					dictLevel = 0
+				}
+			} else {
+				// If no dictLevel is provided and the dictionary does not exist log error and exit
+				log.Fatalf("cannot read dictionary file %q: %v", *flagDict, err)
+			}
+		} else {
+			dictPath = *flagDict
+		}
+	}
+
 	go func() {
 		c := make(chan os.Signal, 1)
 		signal.Notify(c, syscall.SIGINT)
@@ -100,8 +132,8 @@ func main() {
 			// Try the default. Best effort only.
 			var bin string
 			cfg := new(packages.Config)
-			// Note that we do not set GO111MODULE here in order to respect any GO111MODULE 
-			// setting by the user as we are finding dependencies. See modules support 
+			// Note that we do not set GO111MODULE here in order to respect any GO111MODULE
+			// setting by the user as we are finding dependencies. See modules support
 			// comments in go-fuzz-build/main.go for more details.
 			cfg.Env = os.Environ()
 			pkgs, err := packages.Load(cfg, ".")