Skip to content

Commit

Permalink
syz-manager: corpus rotation
Browse files Browse the repository at this point in the history
Use a random subset of syscalls/corpus/coverage for each individual VM run.
Hypothesis is that this should allow fuzzer to get more coverage
find more bugs in saturated state (stuck in local optimum).
See the issue and comments for details.

Update google#1348
  • Loading branch information
dvyukov committed Dec 30, 2019
1 parent a0e84d6 commit 7c4e06f
Show file tree
Hide file tree
Showing 12 changed files with 562 additions and 63 deletions.
7 changes: 0 additions & 7 deletions pkg/signal/signal.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@
// Package signal provides types for working with feedback signal.
package signal

import (
"sort"
)

type (
elemType uint32
prioType int8
Expand Down Expand Up @@ -163,9 +159,6 @@ type Context struct {
}

func Minimize(corpus []Context) []interface{} {
sort.Slice(corpus, func(i, j int) bool {
return corpus[i].Signal.Len() > corpus[j].Signal.Len()
})
type ContextPrio struct {
prio prioType
idx int
Expand Down
4 changes: 1 addition & 3 deletions prog/prio.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,7 @@ func (target *Target) calcResourceUsage() map[string]map[int]weights {
ForeachType(c, func(t Type) {
switch a := t.(type) {
case *ResourceType:
if a.Desc.Name == "pid" || a.Desc.Name == "uid" || a.Desc.Name == "gid" {
// Pid/uid/gid usually play auxiliary role,
// but massively happen in some structs.
if target.AuxResources[a.Desc.Name] {
noteUsage(uses, c, 0.1, a.Dir(), "res%v", a.Desc.Name)
} else {
str := "res"
Expand Down
56 changes: 33 additions & 23 deletions prog/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,25 @@ import (
"fmt"
)

// We need to support structs as resources,
// but for now we just special-case timespec/timeval.
var timespecRes = &ResourceDesc{
Name: "timespec",
Kind: []string{"timespec"},
}
var (
// We need to support structs as resources,
// but for now we just special-case timespec/timeval.
timespecRes = &ResourceDesc{
Name: "timespec",
Kind: []string{"timespec"},
}
// On one hand these are resources, but they don't have constructors.
// It can make sense to provide generic support for such things,
// but for now we just special-case them.
filenameRes = &ResourceDesc{
Name: "filename",
Kind: []string{"filename"},
}
vmaRes = &ResourceDesc{
Name: "vma",
Kind: []string{"vma"},
}
)

func (target *Target) calcResourceCtors(res *ResourceDesc, precise bool) []*Syscall {
var metas []*Syscall
Expand Down Expand Up @@ -113,7 +126,7 @@ func isCompatibleResourceImpl(dst, src []string, precise bool) bool {
return true
}

func (target *Target) inputResources(c *Syscall) []*ResourceDesc {
func (target *Target) getInputResources(c *Syscall) []*ResourceDesc {
var resources []*ResourceDesc
ForeachType(c, func(typ Type) {
if typ.Dir() == DirOut {
Expand All @@ -133,7 +146,7 @@ func (target *Target) inputResources(c *Syscall) []*ResourceDesc {
return resources
}

func (target *Target) outputResources(c *Syscall) []*ResourceDesc {
func (target *Target) getOutputResources(c *Syscall) []*ResourceDesc {
var resources []*ResourceDesc
ForeachType(c, func(typ Type) {
switch typ1 := typ.(type) {
Expand All @@ -149,34 +162,25 @@ func (target *Target) outputResources(c *Syscall) []*ResourceDesc {
return resources
}

func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) {
supported := make(map[*Syscall]bool)
disabled := make(map[*Syscall]string)
canCreate := make(map[string]bool)
inputResources := make(map[*Syscall][]*ResourceDesc)
for c := range enabled {
inputResources[c] = target.inputResources(c)

if c.Name == "pipe$9p" {
fmt.Printf("%v: input resource: %+v\n", c.Name, inputResources[c])
}
}
func (target *Target) transitivelyEnabled(enabled map[*Syscall]bool) (map[*Syscall]bool, map[string]bool) {
supported := make(map[*Syscall]bool, len(enabled))
canCreate := make(map[string]bool, len(enabled))
for {
n := len(supported)
for c := range enabled {
if supported[c] {
continue
}
ready := true
for _, res := range inputResources[c] {
for _, res := range c.inputResources {
if !canCreate[res.Name] {
ready = false
break
}
}
if ready {
supported[c] = true
for _, res := range target.outputResources(c) {
for _, res := range c.outputResources {
for _, kind := range res.Kind {
canCreate[kind] = true
}
Expand All @@ -187,12 +191,18 @@ func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*
break
}
}
return supported, canCreate
}

func (target *Target) TransitivelyEnabledCalls(enabled map[*Syscall]bool) (map[*Syscall]bool, map[*Syscall]string) {
supported, canCreate := target.transitivelyEnabled(enabled)
disabled := make(map[*Syscall]string)
ctors := make(map[string][]string)
for c := range enabled {
if supported[c] {
continue
}
for _, res := range inputResources[c] {
for _, res := range c.inputResources {
if canCreate[res.Name] {
continue
}
Expand Down
240 changes: 240 additions & 0 deletions prog/rotation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
// Copyright 2019 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.

package prog

import (
"math/rand"
"sort"
)

// Rotator selects a random subset of syscalls for corpus rotation.
type Rotator struct {
target *Target
calls map[*Syscall]bool
rnd *rand.Rand
resourceless []*Syscall
syscallUses map[*Syscall][]*ResourceDesc
resources map[*ResourceDesc]rotatorResource
goal int
nresourceless int
}

type rotatorResource struct {
// 0 - precise ctors that don't require other resources as inputs (e.g. socket).
// 1 - precise ctors that require other resources (e.g. accept).
// 2 - all imprecise ctors.
ctors [3][]*Syscall
// 0 - precise uses of this resource.
// 1 - uses of parent resources (e.g. close for sock).
uses [2][]*Syscall
}

func MakeRotator(target *Target, calls map[*Syscall]bool, rnd *rand.Rand) *Rotator {
r := &Rotator{
target: target,
calls: calls,
rnd: rnd,
syscallUses: make(map[*Syscall][]*ResourceDesc),
resources: make(map[*ResourceDesc]rotatorResource),
}
for call := range calls {
r.syscallUses[call] = append(r.syscallUses[call], call.inputResources...)
r.syscallUses[call] = append(r.syscallUses[call], call.outputResources...)
var inputs []*ResourceDesc
for _, res := range call.inputResources {
// Don't take into account pid/uid/etc, they create too many links.
if !target.AuxResources[res.Name] {
inputs = append(inputs, res)
}
}
// VMAs and filenames are effectively resources for our purposes
// (but they don't have ctors).
ForeachType(call, func(t Type) {
switch a := t.(type) {
case *BufferType:
switch a.Kind {
case BufferFilename:
inputs = append(inputs, filenameRes)
}
case *VmaType:
inputs = append(inputs, vmaRes)
}
})

inputDedup := make(map[string]bool, len(inputs))
for _, res := range inputs {
if inputDedup[res.Name] {
continue
}
inputDedup[res.Name] = true
info := r.resources[res]
info.uses[0] = append(info.uses[0], call)
r.resources[res] = info

for _, kind := range res.Kind[:len(res.Kind)-1] {
parent := target.resourceMap[kind]
info := r.resources[parent]
info.uses[1] = append(info.uses[1], call)
r.resources[parent] = info
}
}
outputDedup := make(map[string]bool, len(call.outputResources))
for _, res := range call.outputResources {
if outputDedup[res.Name] {
continue
}
outputDedup[res.Name] = true
info := r.resources[res]
class := 0
if len(inputs) != 0 {
class = 1
}
info.ctors[class] = append(info.ctors[class], call)
r.resources[res] = info
for _, kind := range res.Kind[:len(res.Kind)-1] {
parent := target.resourceMap[kind]
info := r.resources[parent]
info.ctors[2] = append(info.ctors[2], call)
r.resources[parent] = info
}
}
if len(inputs)+len(call.outputResources) == 0 {
r.resourceless = append(r.resourceless, call)
}
}
// For smaller syscall sets we drop ~5% of syscalls.
// However, we assume that 200 syscalls is enough for a fuzzing session,
// so we cap at that level to make fuzzing more targeted.
r.goal = len(calls) * 19 / 20
if r.goal < 1 {
r.goal = 1
}
if max := 200; r.goal > max {
r.goal = max
}
// How many syscalls that don't use any resources we want to add?
r.nresourceless = r.goal * len(r.resourceless) / len(calls)
if r.nresourceless < 1 {
r.nresourceless = 1
}
return r
}

func (r *Rotator) Select() map[*Syscall]bool {
rs := rotatorState{
Rotator: r,
calls: make(map[*Syscall]bool, 3*r.goal),
}
return rs.Select()
}

type rotatorState struct {
*Rotator
calls map[*Syscall]bool
topQueue []*ResourceDesc
depQueue []*ResourceDesc
topHandled map[*ResourceDesc]bool
depHandled map[*ResourceDesc]bool
}

func (rs *rotatorState) Select() map[*Syscall]bool {
// The algorithm is centered around resources.
// But first we add some syscalls that don't use any resources at all
// Otherwise we will never add them in the loop.
// Then, we select a resource and add some ctors for this resources
// and some calls that use it. That's handled by topQueue.
// If any of the calls require other resources as inputs, we also add
// some ctors for these resources, but don't add calls that use them.
// That's handled by depQueue.
// However, a resource can be handled as dependency first, but then
// handled as top resource again. In such case we will still add calls
// that use this resource.
for {
if len(rs.depQueue) == 0 && len(rs.calls) >= rs.goal || len(rs.calls) >= 2*rs.goal {
rs.calls, _ = rs.target.transitivelyEnabled(rs.calls)
if len(rs.calls) >= rs.goal {
return rs.calls
}
}
if len(rs.depQueue) != 0 {
// Handle a dependent resource, add only ctors for these.
// Pick a random one, this gives a mix of DFS and BFS.
idx := rs.rnd.Intn(len(rs.depQueue))
res := rs.depQueue[idx]
rs.depQueue[idx] = rs.depQueue[len(rs.depQueue)-1]
rs.depQueue = rs.depQueue[:len(rs.depQueue)-1]
info := rs.resources[res]
nctors0 := len(info.ctors[0]) != 0
nctors1 := nctors0 || len(info.ctors[1]) != 0
rs.selectCalls(info.ctors[0], 2, true)
if nctors0 {
continue
}
rs.selectCalls(info.ctors[1], 2, !nctors0)
if nctors1 {
continue
}
rs.selectCalls(info.ctors[0], 2, !nctors1)
continue
}
if len(rs.topQueue) == 0 {
// We either just started selection or we handled all resources,
// but did not gather enough syscalls. In both cases we need
// to reset all queues.
rs.topQueue = make([]*ResourceDesc, 0, len(rs.resources))
rs.depQueue = make([]*ResourceDesc, 0, len(rs.resources))
rs.topHandled = make(map[*ResourceDesc]bool, len(rs.resources))
rs.depHandled = make(map[*ResourceDesc]bool, len(rs.resources))
for res := range rs.resources {
rs.topQueue = append(rs.topQueue, res)
}
sort.Slice(rs.topQueue, func(i, j int) bool {
return rs.topQueue[i].Name < rs.topQueue[j].Name
})
rs.rnd.Shuffle(len(rs.topQueue), func(i, j int) {
rs.topQueue[i], rs.topQueue[j] = rs.topQueue[j], rs.topQueue[i]
})
rs.selectCalls(rs.resourceless, rs.nresourceless+1, false)
}
// Handle a top resource, add more syscalls for these.
res := rs.topQueue[0]
rs.topQueue = rs.topQueue[1:]
if rs.topHandled[res] {
panic("top queue already handled")
}
rs.topHandled[res] = true
info := rs.resources[res]
nctors0 := len(info.ctors[0]) != 0
nctors1 := nctors0 || len(info.ctors[1]) != 0
rs.selectCalls(info.ctors[0], 5, true)
rs.selectCalls(info.ctors[1], 3, !nctors0)
rs.selectCalls(info.ctors[0], 2, !nctors1)
rs.selectCalls(info.uses[0], 20, true)
rs.selectCalls(info.uses[1], 2, len(info.uses[0]) == 0)
}
}

func (rs *rotatorState) addCall(call *Syscall) {
if rs.calls[call] {
return
}
rs.calls[call] = true
for _, res := range rs.syscallUses[call] {
if rs.topHandled[res] || rs.depHandled[res] {
continue
}
rs.depHandled[res] = true
rs.depQueue = append(rs.depQueue, res)
}
}

func (rs *rotatorState) selectCalls(set []*Syscall, probability int, force bool) {
if !force && probability < 2 {
panic("will never select anything")
}
for ; len(set) != 0 && (force || rs.rnd.Intn(probability) != 0); force = false {
call := set[rs.rnd.Intn(len(set))]
rs.addCall(call)
}
}
Loading

0 comments on commit 7c4e06f

Please sign in to comment.