Skip to content

Commit

Permalink
prog: introduce more readable format for data args
Browse files Browse the repository at this point in the history
Fixes #460

File names, crypto algorithm names, etc in programs are completely unreadable:

bind$alg(r0, &(0x7f0000408000)={0x26, "6861736800000000000000000000",
0x0, 0x0, "6d6435000000000000000000000000000000000000000000000000
000000000000000000000000000000000000000000000000000000000000000
00000000000"}, 0x58)

Introduce another format for printable strings.
New args are denoted by '' ("" for old args).
New format is enabled for printable chars, \x00
and \t, \r, \n.

Example:
`serialize(&(0x7f0000408000)={"6861736800000000000000000000", "4849000000"})`,
vs:
`serialize(&(0x7f0000408000)={'hash\x00', 'HI\x00'})`,
  • Loading branch information
dvyukov committed Dec 17, 2017
1 parent 286edfb commit 41799de
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 152 deletions.
10 changes: 6 additions & 4 deletions executor/syscalls_test.h
Expand Up @@ -2,10 +2,10 @@

#if 0
#define GOARCH "32"
#define SYZ_REVISION "d250d71b6658517f5cde664a9f26c969ddb5c4d1"
#define SYZ_REVISION "ab5b2b746192f26c994482e664534cbf487e3dbe"
#define __NR_syz_test 1000000

unsigned syscall_count = 74;
unsigned syscall_count = 75;
call_t syscalls[] = {
{"mmap", 0, (syscall_t)mmap},
{"mutate0", 0, (syscall_t)mutate0},
Expand All @@ -15,6 +15,7 @@ call_t syscalls[] = {
{"mutate4", 0, (syscall_t)mutate4},
{"mutate5", 0, (syscall_t)mutate5},
{"mutate6", 0, (syscall_t)mutate6},
{"serialize", 0, (syscall_t)serialize},
{"syz_test", 1000000, (syscall_t)syz_test},
{"syz_test$align0", 1000000, (syscall_t)syz_test},
{"syz_test$align1", 1000000, (syscall_t)syz_test},
Expand Down Expand Up @@ -87,10 +88,10 @@ call_t syscalls[] = {

#if 0
#define GOARCH "64"
#define SYZ_REVISION "42c3155912d115b719bd9a905c83c2419b5935f2"
#define SYZ_REVISION "1dd179fe95626e284e24e4e636195924bced24ef"
#define __NR_syz_test 1000000

unsigned syscall_count = 74;
unsigned syscall_count = 75;
call_t syscalls[] = {
{"mmap", 0, (syscall_t)mmap},
{"mutate0", 0, (syscall_t)mutate0},
Expand All @@ -100,6 +101,7 @@ call_t syscalls[] = {
{"mutate4", 0, (syscall_t)mutate4},
{"mutate5", 0, (syscall_t)mutate5},
{"mutate6", 0, (syscall_t)mutate6},
{"serialize", 0, (syscall_t)serialize},
{"syz_test", 1000000, (syscall_t)syz_test},
{"syz_test$align0", 1000000, (syscall_t)syz_test},
{"syz_test$align1", 1000000, (syscall_t)syz_test},
Expand Down
155 changes: 141 additions & 14 deletions prog/encoding.go
Expand Up @@ -8,7 +8,6 @@ import (
"bytes"
"encoding/hex"
"fmt"
"io"
"strconv"
)

Expand Down Expand Up @@ -54,7 +53,7 @@ func (p *Prog) Serialize() []byte {
return buf.Bytes()
}

func serialize(arg Arg, buf io.Writer, vars map[Arg]int, varSeq *int) {
func serialize(arg Arg, buf *bytes.Buffer, vars map[Arg]int, varSeq *int) {
if arg == nil {
fmt.Fprintf(buf, "nil")
return
Expand All @@ -75,7 +74,15 @@ func serialize(arg Arg, buf io.Writer, vars map[Arg]int, varSeq *int) {
fmt.Fprintf(buf, "&%v=", serializeAddr(arg))
serialize(a.Res, buf, vars, varSeq)
case *DataArg:
fmt.Fprintf(buf, "\"%v\"", hex.EncodeToString(a.Data))
data := a.Data
if !arg.Type().Varlen() {
// Statically typed data will be padded with 0s during
// deserialization, so we can strip them here for readability.
for len(data) >= 2 && data[len(data)-1] == 0 && data[len(data)-2] == 0 {
data = data[:len(data)-1]
}
}
serializeData(buf, data)
case *GroupArg:
var delims []byte
switch arg.Type().(type) {
Expand Down Expand Up @@ -125,8 +132,7 @@ func (target *Target) Deserialize(data []byte) (prog *Prog, err error) {
prog = &Prog{
Target: target,
}
p := &parser{r: bufio.NewScanner(bytes.NewReader(data))}
p.r.Buffer(nil, maxLineLen)
p := newParser(data)
vars := make(map[string]Arg)
for p.Scan() {
if p.EOF() || p.Char() == '#' {
Expand Down Expand Up @@ -276,16 +282,10 @@ func (target *Target) parseArg(typ Type, p *parser, vars map[string]Arg) (Arg, e
return nil, err
}
arg = MakeConstArg(typ, pages*target.PageSize)
case '"':
p.Parse('"')
val := ""
if p.Char() != '"' {
val = p.Ident()
}
p.Parse('"')
data, err := hex.DecodeString(val)
case '"', '\'':
data, err := deserializeData(p)
if err != nil {
return nil, fmt.Errorf("data arg has bad value '%v'", val)
return nil, err
}
if !typ.Varlen() {
if diff := int(typ.Size()) - len(data); diff > 0 {
Expand Down Expand Up @@ -475,6 +475,114 @@ func parseAddr(p *parser, base bool) (uint64, int, uint64, error) {
return page, int(off), size, nil
}

func serializeData(buf *bytes.Buffer, data []byte) {
readable := true
for _, v := range data {
if v >= 0x20 && v < 0x7f {
continue
}
switch v {
case 0, '\a', '\b', '\f', '\n', '\r', '\t', '\v':
continue
}
readable = false
break
}
if !readable || len(data) == 0 {
fmt.Fprintf(buf, "\"%v\"", hex.EncodeToString(data))
return
}
buf.WriteByte('\'')
for _, v := range data {
switch v {
case 0:
buf.Write([]byte{'\\', 'x', '0', '0'})
case '\a':
buf.Write([]byte{'\\', 'a'})
case '\b':
buf.Write([]byte{'\\', 'b'})
case '\f':
buf.Write([]byte{'\\', 'f'})
case '\n':
buf.Write([]byte{'\\', 'n'})
case '\r':
buf.Write([]byte{'\\', 'r'})
case '\t':
buf.Write([]byte{'\\', 't'})
case '\v':
buf.Write([]byte{'\\', 'v'})
case '\'':
buf.Write([]byte{'\\', '\''})
case '\\':
buf.Write([]byte{'\\', '\\'})
default:
buf.WriteByte(v)
}
}
buf.WriteByte('\'')
}

func deserializeData(p *parser) ([]byte, error) {
var data []byte
if p.Char() == '"' {
p.Parse('"')
val := ""
if p.Char() != '"' {
val = p.Ident()
}
p.Parse('"')
var err error
data, err = hex.DecodeString(val)
if err != nil {
return nil, fmt.Errorf("data arg has bad value %q", val)
}
} else {
if p.consume() != '\'' {
return nil, fmt.Errorf("data arg does not start with \" nor with '")
}
for p.Char() != '\'' && p.Char() != 0 {
v := p.consume()
if v != '\\' {
data = append(data, v)
continue
}
v = p.consume()
switch v {
case 'x':
hi := p.consume()
lo := p.consume()
if lo != '0' || hi != '0' {
return nil, fmt.Errorf(
"invalid \\x%c%c escape sequence in data arg", hi, lo)
}
data = append(data, 0)
case 'a':
data = append(data, '\a')
case 'b':
data = append(data, '\b')
case 'f':
data = append(data, '\f')
case 'n':
data = append(data, '\n')
case 'r':
data = append(data, '\r')
case 't':
data = append(data, '\t')
case 'v':
data = append(data, '\v')
case '\'':
data = append(data, '\'')
case '\\':
data = append(data, '\\')
default:
return nil, fmt.Errorf("invalid \\%c escape sequence in data arg", v)
}
}
p.Parse('\'')
}
return data, nil
}

type parser struct {
r *bufio.Scanner
s string
Expand All @@ -483,6 +591,12 @@ type parser struct {
e error
}

func newParser(data []byte) *parser {
p := &parser{r: bufio.NewScanner(bytes.NewReader(data))}
p.r.Buffer(nil, maxLineLen)
return p
}

func (p *parser) Scan() bool {
if p.e != nil {
return false
Expand Down Expand Up @@ -536,6 +650,19 @@ func (p *parser) Parse(ch byte) {
p.SkipWs()
}

func (p *parser) consume() byte {
if p.e != nil {
return 0
}
if p.EOF() {
p.failf("unexpected eof")
return 0
}
v := p.s[p.i]
p.i++
return v
}

func (p *parser) SkipWs() {
for p.i < len(p.s) && (p.s[p.i] == ' ' || p.s[p.i] == '\t') {
p.i++
Expand Down
47 changes: 47 additions & 0 deletions prog/encoding_test.go
Expand Up @@ -4,7 +4,9 @@
package prog

import (
"bytes"
"fmt"
"math/rand"
"reflect"
"regexp"
"sort"
Expand All @@ -20,6 +22,30 @@ func setToArray(s map[string]struct{}) []string {
return a
}

func TestSerializeData(t *testing.T) {
t.Parallel()
r := rand.New(rand.NewSource(0))
for i := 0; i < 1e4; i++ {
data := make([]byte, r.Intn(4))
for i := range data {
data[i] = byte(r.Intn(256))
}
buf := new(bytes.Buffer)
serializeData(buf, data)
p := newParser(buf.Bytes())
if !p.Scan() {
t.Fatalf("parser does not scan")
}
data1, err := deserializeData(p)
if err != nil {
t.Fatalf("failed to deserialize %q -> %s: %v", data, buf.Bytes(), err)
}
if !bytes.Equal(data, data1) {
t.Fatalf("corrupted data %q -> %s -> %q", data, buf.Bytes(), data1)
}
}
}

func TestCallSet(t *testing.T) {
tests := []struct {
prog string
Expand Down Expand Up @@ -137,3 +163,24 @@ func TestDeserialize(t *testing.T) {
}
}
}

func TestSerializeDeserialize(t *testing.T) {
target := initTargetTest(t, "test", "64")
tests := [][2]string{
{
`serialize(&(0x7f0000408000)={"6861736800000000000000000000", "4849000000"})`,
`serialize(&(0x7f0000408000)={'hash\x00', 'HI\x00'})`,
},
}
for _, test := range tests {
p, err := target.Deserialize([]byte(test[0]))
if err != nil {
t.Fatal(err)
}
data := p.Serialize()
test[1] += "\n"
if string(data) != test[1] {
t.Fatalf("\ngot : %s\nwant: %s", data, test[1])
}
}
}

0 comments on commit 41799de

Please sign in to comment.