/
jobs.go
815 lines (775 loc) · 24.8 KB
/
jobs.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
// Copyright 2017 syzkaller project authors. All rights reserved.
// Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
package main
import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"sync"
"time"
"github.com/google/syzkaller/dashboard/dashapi"
"github.com/google/syzkaller/pkg/bisect"
"github.com/google/syzkaller/pkg/build"
"github.com/google/syzkaller/pkg/debugtracer"
"github.com/google/syzkaller/pkg/instance"
"github.com/google/syzkaller/pkg/log"
"github.com/google/syzkaller/pkg/mgrconfig"
"github.com/google/syzkaller/pkg/osutil"
"github.com/google/syzkaller/pkg/report"
"github.com/google/syzkaller/pkg/vcs"
"github.com/google/syzkaller/vm"
)
type JobManager struct {
cfg *Config
dash *dashapi.Dashboard
managers []*Manager
parallelJobFilter *ManagerJobs
shutdownPending <-chan struct{}
}
type JobProcessor struct {
*JobManager
name string
instanceSuffix string
knownCommits map[string]bool
baseDir string
jobFilter *ManagerJobs
jobTicker <-chan time.Time
commitTicker <-chan time.Time
}
func newJobManager(cfg *Config, managers []*Manager, shutdownPending chan struct{}) (*JobManager, error) {
dash, err := dashapi.New(cfg.DashboardClient, cfg.DashboardAddr, cfg.DashboardKey)
if err != nil {
return nil, err
}
return &JobManager{
cfg: cfg,
dash: dash,
managers: managers,
shutdownPending: shutdownPending,
// For now let's only parallelize patch testing requests.
parallelJobFilter: &ManagerJobs{TestPatches: true},
}, nil
}
// startLoop starts a job loop in parallel and returns a blocking function
// to gracefully stop job processing.
func (jm *JobManager) startLoop(wg *sync.WaitGroup) func() {
stop := make(chan struct{})
done := make(chan struct{}, 1)
wg.Add(1)
go func() {
defer wg.Done()
jm.loop(stop)
done <- struct{}{}
}()
return func() {
close(stop)
<-done
}
}
func (jm *JobManager) loop(stop chan struct{}) {
if err := jm.resetJobs(); err != nil {
if jm.dash != nil {
jm.dash.LogError("syz-ci", "reset jobs failed: %v", err)
}
return
}
commitTicker := time.NewTicker(time.Duration(jm.cfg.CommitPollPeriod) * time.Second)
defer commitTicker.Stop()
jobTicker := time.NewTicker(time.Duration(jm.cfg.JobPollPeriod) * time.Second)
defer jobTicker.Stop()
var wg sync.WaitGroup
for main := true; ; main = false {
jp := &JobProcessor{
JobManager: jm,
jobTicker: jobTicker.C,
}
if main {
jp.instanceSuffix = "-job"
jp.baseDir = osutil.Abs("jobs")
jp.commitTicker = commitTicker.C
jp.knownCommits = make(map[string]bool)
} else {
jp.instanceSuffix = "-job-parallel"
jp.baseDir = osutil.Abs("jobs-2")
jp.jobFilter = jm.parallelJobFilter
}
jp.name = fmt.Sprintf("%v%v", jm.cfg.Name, jp.instanceSuffix)
wg.Add(1)
go func() {
defer wg.Done()
jp.loop(stop)
}()
if !main || !jm.needParallelProcessor() {
break
}
}
wg.Wait()
}
func (jm *JobManager) needParallelProcessor() bool {
if !jm.cfg.ParallelJobs {
return false
}
for _, mgr := range jm.managers {
if mgr.mgrcfg.Jobs.Filter(jm.parallelJobFilter).AnyEnabled() {
return true
}
}
return false
}
func (jm *JobManager) resetJobs() error {
managerNames := []string{}
for _, mgr := range jm.managers {
if mgr.mgrcfg.Jobs.AnyEnabled() {
managerNames = append(managerNames, mgr.name)
}
}
if len(managerNames) > 0 {
return jm.dash.JobReset(&dashapi.JobResetReq{Managers: managerNames})
}
return nil
}
func (jp *JobProcessor) loop(stop chan struct{}) {
jp.Logf(0, "job loop started")
loop:
for {
// Check jp.stop separately first, otherwise if stop signal arrives during a job execution,
// we can still grab the next job with 50% probability.
select {
case <-stop:
break loop
default:
}
// Similar for commit polling: if we grab 2-3 bisect jobs in a row,
// it can delay commit polling by days.
select {
case <-jp.commitTicker:
jp.pollCommits()
default:
}
select {
case <-jp.jobTicker:
jp.pollJobs()
case <-jp.commitTicker:
jp.pollCommits()
case <-stop:
break loop
}
}
jp.Logf(0, "job loop stopped")
}
func (jp *JobProcessor) pollCommits() {
for _, mgr := range jp.managers {
if !mgr.mgrcfg.Jobs.PollCommits {
continue
}
if err := jp.pollManagerCommits(mgr); err != nil {
jp.Errorf("failed to poll commits on %v: %v", mgr.name, err)
}
}
}
func brokenRepo(url string) bool {
// TODO(dvyukov): mmots contains weird squashed commits titled "linux-next" or "origin",
// which contain hundreds of other commits. This makes fix attribution totally broken.
return strings.Contains(url, "git.cmpxchg.org/linux-mmots")
}
func (jp *JobProcessor) pollManagerCommits(mgr *Manager) error {
resp, err := mgr.dash.CommitPoll()
if err != nil {
return err
}
jp.Logf(0, "polling commits for %v: repos %v, commits %v", mgr.name, len(resp.Repos), len(resp.Commits))
if len(resp.Repos) == 0 {
return fmt.Errorf("no repos")
}
commits := make(map[string]*vcs.Commit)
for i, repo := range resp.Repos {
if brokenRepo(repo.URL) {
continue
}
if resp.ReportEmail != "" {
commits1, err := jp.pollRepo(mgr, repo.URL, repo.Branch, resp.ReportEmail)
if err != nil {
jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err)
continue
}
jp.Logf(1, "got %v commits from %v/%v repo", len(commits1), repo.URL, repo.Branch)
for _, com := range commits1 {
// Only the "main" repo is the source of true hashes.
if i != 0 {
com.Hash = ""
}
// Not overwrite existing commits, in particular commit from the main repo with hash.
if _, ok := commits[com.Title]; !ok && !jp.knownCommits[com.Title] && len(commits) < 100 {
commits[com.Title] = com
jp.knownCommits[com.Title] = true
}
}
}
if i == 0 && len(resp.Commits) != 0 {
commits1, err := jp.getCommitInfo(mgr, repo.URL, repo.Branch, resp.Commits)
if err != nil {
jp.Errorf("failed to poll %v %v: %v", repo.URL, repo.Branch, err)
continue
}
jp.Logf(1, "got %v commit infos from %v/%v repo", len(commits1), repo.URL, repo.Branch)
for _, com := range commits1 {
// GetCommitByTitle does not accept ReportEmail and does not return tags,
// so don't replace the existing commit.
if _, ok := commits[com.Title]; !ok {
commits[com.Title] = com
}
}
}
}
results := make([]dashapi.Commit, 0, len(commits))
for _, com := range commits {
results = append(results, dashapi.Commit{
Hash: com.Hash,
Title: com.Title,
Author: com.Author,
BugIDs: com.Tags,
Date: com.Date,
})
}
return mgr.dash.UploadCommits(results)
}
func (jp *JobProcessor) pollRepo(mgr *Manager, URL, branch, reportEmail string) ([]*vcs.Commit, error) {
dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel")
repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir)
if err != nil {
return nil, fmt.Errorf("failed to create kernel repo: %w", err)
}
if _, err = repo.CheckoutBranch(URL, branch); err != nil {
return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err)
}
return repo.ExtractFixTagsFromCommits("HEAD", reportEmail)
}
func (jp *JobProcessor) getCommitInfo(mgr *Manager, URL, branch string, commits []string) ([]*vcs.Commit, error) {
dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS, "kernel")
repo, err := vcs.NewRepo(mgr.managercfg.TargetOS, mgr.managercfg.Type, dir)
if err != nil {
return nil, fmt.Errorf("failed to create kernel repo: %w", err)
}
if _, err = repo.CheckoutBranch(URL, branch); err != nil {
return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w", URL, branch, err)
}
results, missing, err := repo.GetCommitsByTitles(commits)
if err != nil {
return nil, err
}
for _, title := range missing {
jp.Logf(0, "did not find commit %q in kernel repo %v/%v", title, URL, branch)
}
return results, nil
}
func (jp *JobProcessor) pollJobs() {
poll := &dashapi.JobPollReq{
Managers: make(map[string]dashapi.ManagerJobs),
}
for _, mgr := range jp.managers {
jobs := &mgr.mgrcfg.Jobs
if jp.jobFilter != nil {
jobs = jobs.Filter(jp.jobFilter)
}
apiJobs := dashapi.ManagerJobs{
TestPatches: jobs.TestPatches,
BisectCause: jobs.BisectCause,
BisectFix: jobs.BisectFix,
}
if apiJobs.Any() {
poll.Managers[mgr.name] = apiJobs
}
}
if len(poll.Managers) == 0 {
return
}
req, err := jp.dash.JobPoll(poll)
if err != nil {
jp.Errorf("failed to poll jobs: %v", err)
return
}
if req.ID == "" {
return
}
var mgr *Manager
for _, m := range jp.managers {
if m.name == req.Manager {
mgr = m
break
}
}
if mgr == nil {
jp.Errorf("got job for unknown manager: %v", req.Manager)
return
}
job := &Job{
req: req,
mgr: mgr,
}
jp.processJob(job)
}
func (jp *JobProcessor) processJob(job *Job) {
req := job.req
jp.Logf(0, "starting job %v type %v for manager %v on %v/%v",
req.ID, req.Type, req.Manager, req.KernelRepo, req.KernelBranch)
resp := jp.process(job)
jp.Logf(0, "done job %v: commit %v, crash %q, error: %s",
resp.ID, resp.Build.KernelCommit, resp.CrashTitle, resp.Error)
select {
case <-jp.shutdownPending:
if len(resp.Error) != 0 {
// Ctrl+C can kill a child process which will cause an error.
jp.Logf(0, "ignoring error: shutdown pending")
return
}
default:
}
if err := jp.dash.JobDone(resp); err != nil {
jp.Errorf("failed to mark job as done: %v", err)
return
}
}
type Job struct {
req *dashapi.JobPollResp
resp *dashapi.JobDoneReq
mgr *Manager
}
func (jp *JobProcessor) process(job *Job) *dashapi.JobDoneReq {
req, mgr := job.req, job.mgr
dir := filepath.Join(jp.baseDir, mgr.managercfg.TargetOS)
mgrcfg := new(mgrconfig.Config)
*mgrcfg = *mgr.managercfg
mgrcfg.Workdir = filepath.Join(dir, "workdir")
mgrcfg.KernelSrc = filepath.Join(dir, "kernel", mgr.mgrcfg.KernelSrcSuffix)
mgrcfg.Syzkaller = filepath.Join(dir, "gopath", "src", "github.com", "google", "syzkaller")
os.RemoveAll(mgrcfg.Workdir)
defer os.RemoveAll(mgrcfg.Workdir)
resp := &dashapi.JobDoneReq{
ID: req.ID,
Build: dashapi.Build{
Manager: mgr.name,
ID: req.ID,
OS: mgr.managercfg.TargetOS,
Arch: mgr.managercfg.TargetArch,
VMArch: mgr.managercfg.TargetVMArch,
SyzkallerCommit: req.SyzkallerCommit,
},
}
job.resp = resp
resp.Build.KernelRepo = req.KernelRepo
resp.Build.KernelBranch = req.KernelBranch
resp.Build.KernelConfig = req.KernelConfig
switch req.Type {
case dashapi.JobTestPatch:
resp.Build.KernelCommit = "[unknown]"
mgrcfg.Name += "-test" + jp.instanceSuffix
case dashapi.JobBisectCause, dashapi.JobBisectFix:
resp.Build.KernelCommit = req.KernelCommit
resp.Build.KernelCommitTitle = req.KernelCommitTitle
mgrcfg.Name += "-bisect" + jp.instanceSuffix
default:
err := fmt.Errorf("bad job type %v", req.Type)
job.resp.Error = []byte(err.Error())
jp.Errorf("%s", err)
return job.resp
}
if req.KernelRepo == "" {
req.KernelRepo = mgr.mgrcfg.Repo
req.KernelBranch = mgr.mgrcfg.Branch
}
required := []struct {
name string
ok bool
}{
{"kernel repository", req.KernelRepo != "" || req.Type != dashapi.JobTestPatch},
{"kernel branch", req.KernelBranch != "" || req.Type != dashapi.JobTestPatch},
{"kernel config", len(req.KernelConfig) != 0},
{"syzkaller commit", req.SyzkallerCommit != ""},
// We either want a normal repro (with options and syz repro text)
// or it's a boot time bug, in which case both are empty.
{"reproducer consistency", (len(req.ReproOpts) != 0 && len(req.ReproSyz) != 0) ||
(len(req.ReproOpts) == 0 && len(req.ReproSyz) == 0)},
}
for _, req := range required {
if !req.ok {
job.resp.Error = []byte(req.name + " is empty")
jp.Errorf("%s", job.resp.Error)
return job.resp
}
}
if typ := mgr.managercfg.Type; !vm.AllowsOvercommit(typ) {
job.resp.Error = []byte(fmt.Sprintf("testing is not yet supported for %v machine type.", typ))
jp.Errorf("%s", job.resp.Error)
return job.resp
}
var err error
switch req.Type {
case dashapi.JobTestPatch:
err = jp.testPatch(job, mgrcfg)
case dashapi.JobBisectCause, dashapi.JobBisectFix:
err = jp.bisect(job, mgrcfg)
}
if err != nil {
job.resp.Error = []byte(err.Error())
}
return job.resp
}
func (jp *JobProcessor) bisect(job *Job, mgrcfg *mgrconfig.Config) error {
req, resp, mgr := job.req, job.resp, job.mgr
// Hack: if the manager has only, say, 5 VMs, but bisect wants 10, try to override number of VMs to 10.
// OverrideVMCount is opportunistic and should do it only if it's safe.
if err := instance.OverrideVMCount(mgrcfg, bisect.MaxNumTests); err != nil {
return err
}
var baseline []byte
// Read possible baseline for config minimization.
if mgr.mgrcfg.KernelBaselineConfig != "" {
var err error
baseline, err = os.ReadFile(mgr.mgrcfg.KernelBaselineConfig)
if err != nil {
return fmt.Errorf("failed to read baseline config: %w", err)
}
}
err := jp.prepareBisectionRepo(mgrcfg, req)
if err != nil {
return err
}
trace := new(bytes.Buffer)
cfg := &bisect.Config{
Trace: &debugtracer.GenericTracer{
TraceWriter: io.MultiWriter(trace, log.VerboseWriter(3)),
OutDir: osutil.Abs(filepath.Join("jobs", "debug", strings.Replace(req.ID, "|", "_", -1))),
},
// Out of 1049 cause bisections that we have now:
// - 891 finished under 6h (84.9%)
// - 957 finished under 8h (91.2%)
// - 980 finished under 10h (93.4%)
// - 989 finished under 12h (94.3%)
// - 1011 finished under 18h (96.3%)
// - 1025 finished under 24h (97.7%)
// There is also a significant increase in errors/inconclusive bisections after ~8h.
// Out of 4075 fix bisections:
// - 4015 finished under 6h (98.5%)
// - 4020 finished under 8h (98.7%)
// - 4026 finished under 10h (98.8%)
// - 4032 finished under 12h (98.9%)
// Significant increase in errors starts after ~12h.
// Bisection jobs are now executed in parallel to patch testing, so it doesn't destroy user experience.
// Let's set the timeout to 12h.
Timeout: 12 * time.Hour,
Fix: req.Type == dashapi.JobBisectFix,
DefaultCompiler: mgr.mgrcfg.Compiler,
CompilerType: mgr.mgrcfg.CompilerType,
BinDir: jp.cfg.BisectBinDir,
Linker: mgr.mgrcfg.Linker,
Ccache: jp.cfg.Ccache,
Kernel: bisect.KernelConfig{
Repo: req.KernelRepo,
Branch: req.KernelBranch,
Commit: req.KernelCommit,
CommitTitle: req.KernelCommitTitle,
Cmdline: mgr.mgrcfg.KernelCmdline,
Sysctl: mgr.mgrcfg.KernelSysctl,
Config: req.KernelConfig,
BaselineConfig: baseline,
Userspace: mgr.mgrcfg.Userspace,
Backports: mgr.backportCommits(),
},
Syzkaller: bisect.SyzkallerConfig{
Repo: jp.cfg.SyzkallerRepo,
Commit: req.SyzkallerCommit,
},
Repro: bisect.ReproConfig{
Opts: req.ReproOpts,
Syz: req.ReproSyz,
C: req.ReproC,
},
CrossTree: req.MergeBaseRepo != "",
Manager: mgrcfg,
BuildSemaphore: buildSem,
TestSemaphore: testSem,
}
res, err := bisect.Run(cfg)
resp.Log = trace.Bytes()
if err != nil {
var infraErr *bisect.InfraError
if errors.As(err, &infraErr) {
resp.Flags |= dashapi.BisectResultInfraError
}
return err
}
for _, com := range res.Commits {
resp.Commits = append(resp.Commits, dashapi.Commit{
Hash: com.Hash,
Title: com.Title,
Author: com.Author,
AuthorName: com.AuthorName,
Recipients: com.Recipients.ToDash(),
Date: com.Date,
})
}
if len(res.Commits) == 1 {
if len(res.Commits[0].Parents) > 1 {
resp.Flags |= dashapi.BisectResultMerge
}
if res.NoopChange {
resp.Flags |= dashapi.BisectResultNoop
}
if res.IsRelease {
resp.Flags |= dashapi.BisectResultRelease
}
const confidenceCutOff = 0.66
if res.Confidence < confidenceCutOff {
resp.Flags |= dashapi.BisectResultIgnore
}
if jp.ignoreBisectCommit(res.Commits[0]) {
resp.Flags |= dashapi.BisectResultIgnore
}
}
if res.Report != nil {
resp.CrashTitle = res.Report.Title
resp.CrashAltTitles = res.Report.AltTitles
resp.CrashReport = res.Report.Report
resp.CrashLog = res.Report.Output
if len(resp.Commits) != 0 {
resp.Commits[0].Recipients = append(resp.Commits[0].Recipients, res.Report.Recipients.ToDash()...)
} else {
// If there is a report and there is no commit, it means a crash
// occurred on HEAD(for BisectFix) and oldest tested release(for BisectCause).
resp.Build.KernelCommit = res.Commit.Hash
resp.Build.KernelCommitDate = res.Commit.CommitDate
resp.Build.KernelCommitTitle = res.Commit.Title
}
}
return nil
}
var ignoredCommits = []string{
// Commit "usb: gadget: add raw-gadget interface" adds a kernel interface for
// triggering USB bugs, which ends up being the guilty commit during bisection
// for USB bugs introduced before it.
"f2c2e717642c66f7fe7e5dd69b2e8ff5849f4d10",
// Commit "devlink: bump the instance index directly when iterating" has likely
// fixed some frequent task hung, which skews fix bisection results.
// TODO: consider backporting it during bisection itself.
"d772781964415c63759572b917e21c4f7ec08d9f",
}
func (jp *JobProcessor) ignoreBisectCommit(commit *vcs.Commit) bool {
// First look at the always ignored values.
for _, hash := range ignoredCommits {
if commit.Hash == hash {
return true
}
}
_, ok := jp.cfg.BisectIgnore[commit.Hash]
return ok
}
func (jp *JobProcessor) testPatch(job *Job, mgrcfg *mgrconfig.Config) error {
req, resp, mgr := job.req, job.resp, job.mgr
env, err := instance.NewEnv(mgrcfg, buildSem, testSem)
if err != nil {
return err
}
jp.Logf(0, "building syzkaller on %v...", req.SyzkallerCommit)
syzBuildLog, syzBuildErr := env.BuildSyzkaller(jp.cfg.SyzkallerRepo, req.SyzkallerCommit)
if syzBuildErr != nil {
return syzBuildErr
}
jp.Logf(0, "fetching kernel...")
repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc)
if err != nil {
return fmt.Errorf("failed to create kernel repo: %w", err)
}
kernelCommit, err := jp.checkoutJobCommit(job, repo)
if err != nil {
return err
}
resp.Build.KernelCommit = kernelCommit.Hash
resp.Build.KernelCommitTitle = kernelCommit.Title
resp.Build.KernelCommitDate = kernelCommit.CommitDate
if err := build.Clean(mgrcfg.TargetOS, mgrcfg.TargetVMArch, mgrcfg.Type, mgrcfg.KernelSrc); err != nil {
return fmt.Errorf("kernel clean failed: %w", err)
}
if len(req.Patch) != 0 {
if err := vcs.Patch(mgrcfg.KernelSrc, req.Patch); err != nil {
return err
}
}
// Disable CONFIG_DEBUG_INFO_BTF in the config.
// DEBUG_INFO_BTF requires a very new pahole binary, which we don't have on syzbot instances.
// Currently we don't enable DEBUG_INFO_BTF, but we have some old bugs with DEBUG_INFO_BTF enabled
// (at the time requirements for pahole binary were lower, or maybe the config silently disabled itself).
// Testing of patches for these bugs fail now because of the config, so we disable it as a work-around.
// Ideally we have a new pahole and then we can remove this hack. That's issue #2096.
// pkg/vcs/linux.go also disables it for the bisection process.
req.KernelConfig = bytes.Replace(req.KernelConfig,
[]byte("CONFIG_DEBUG_INFO_BTF=y"),
[]byte("# CONFIG_DEBUG_INFO_BTF is not set"), -1)
log.Logf(0, "job: building kernel...")
kernelConfig, details, err := env.BuildKernel(&instance.BuildKernelConfig{
CompilerBin: mgr.mgrcfg.Compiler,
LinkerBin: mgr.mgrcfg.Linker,
CcacheBin: mgr.mgrcfg.Ccache,
UserspaceDir: mgr.mgrcfg.Userspace,
CmdlineFile: mgr.mgrcfg.KernelCmdline,
SysctlFile: mgr.mgrcfg.KernelSysctl,
KernelConfig: req.KernelConfig,
})
resp.Build.CompilerID = details.CompilerID
if err != nil {
return err
}
if kernelConfig != "" {
resp.Build.KernelConfig, err = os.ReadFile(kernelConfig)
if err != nil {
return fmt.Errorf("failed to read config file: %w", err)
}
}
jp.Logf(0, "job: testing...")
results, err := env.Test(3, req.ReproSyz, req.ReproOpts, req.ReproC)
if err != nil {
return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog)
}
ret, err := aggregateTestResults(results)
if err != nil {
return fmt.Errorf("%w\n\nsyzkaller build log:\n%s", err, syzBuildLog)
}
rep := ret.report
if rep != nil {
resp.CrashTitle = rep.Title
resp.CrashAltTitles = rep.AltTitles
resp.CrashReport = rep.Report
}
resp.CrashLog = ret.rawOutput
return nil
}
func (jp *JobProcessor) prepareBisectionRepo(mgrcfg *mgrconfig.Config, req *dashapi.JobPollResp) error {
if req.MergeBaseRepo == "" {
// No need to.
return nil
}
repo, err := vcs.NewRepo(mgrcfg.TargetOS, mgrcfg.Type, mgrcfg.KernelSrc)
if err != nil {
return fmt.Errorf("failed to create kernel repo: %w", err)
}
_, err = checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch)
if err != nil {
return fmt.Errorf("failed to checkout the merge base repo %v on %v: %w",
req.MergeBaseRepo, req.MergeBaseBranch, err)
}
return nil
}
func (jp *JobProcessor) checkoutJobCommit(job *Job, repo vcs.Repo) (*vcs.Commit, error) {
req, resp := job.req, job.resp
var kernelCommit *vcs.Commit
if req.MergeBaseRepo != "" {
jp.Logf(1, "checking out the base kernel...")
firstCommit, err := checkoutKernelOrCommit(repo, req.KernelRepo, req.KernelBranch)
if err != nil {
return nil, fmt.Errorf("failed to checkout first kernel repo %v on %v: %w",
req.KernelRepo, req.KernelBranch, err)
}
secondCommit, err := checkoutKernelOrCommit(repo, req.MergeBaseRepo, req.MergeBaseBranch)
if err != nil {
return nil, fmt.Errorf("failed to checkout second kernel repo %v on %v: %w",
req.MergeBaseRepo, req.MergeBaseBranch, err)
}
bases, err := repo.MergeBases(firstCommit.Hash, secondCommit.Hash)
if err != nil {
return nil, fmt.Errorf("failed to calculate merge bases between %v and %v: %w",
firstCommit.Hash, secondCommit.Hash, err)
}
if len(bases) != 1 {
return nil, fmt.Errorf("expected one merge base between %v and %v, got %d",
firstCommit.Hash, secondCommit.Hash, len(bases))
}
kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, bases[0].Hash)
if err != nil {
return nil, fmt.Errorf("failed to checkout kernel repo %v on merge base %v: %w",
req.KernelRepo, bases[0].Hash, err)
}
resp.Build.KernelBranch = ""
} else if vcs.CheckCommitHash(req.KernelBranch) {
var err error
kernelCommit, err = repo.CheckoutCommit(req.KernelRepo, req.KernelBranch)
if err != nil {
return nil, fmt.Errorf("failed to checkout kernel repo %v on commit %v: %w",
req.KernelRepo, req.KernelBranch, err)
}
resp.Build.KernelBranch = ""
} else {
var err error
kernelCommit, err = repo.CheckoutBranch(req.KernelRepo, req.KernelBranch)
if err != nil {
return nil, fmt.Errorf("failed to checkout kernel repo %v/%v: %w",
req.KernelRepo, req.KernelBranch, err)
}
}
return kernelCommit, nil
}
func checkoutKernelOrCommit(repo vcs.Repo, url, branch string) (*vcs.Commit, error) {
if vcs.CheckCommitHash(branch) {
return repo.CheckoutCommit(url, branch)
}
return repo.CheckoutBranch(url, branch)
}
type patchTestResult struct {
report *report.Report
rawOutput []byte
}
func aggregateTestResults(results []instance.EnvTestResult) (*patchTestResult, error) {
// We can have transient errors and other errors of different types.
// We need to avoid reporting transient "failed to boot" or "failed to copy binary" errors.
// If any of the instances crash during testing, we report this with the highest priority.
// Then if any of the runs succeed, we report that (to avoid transient errors).
// If all instances failed to boot, then we report one of these errors.
var anyErr, testErr error
var resReport, resSuccess *patchTestResult
anyErr = fmt.Errorf("no env test runs")
for _, res := range results {
if res.Error == nil {
resSuccess = &patchTestResult{rawOutput: res.RawOutput}
continue
}
anyErr = res.Error
var testError *instance.TestError
var crashError *instance.CrashError
switch {
case errors.As(res.Error, &testError):
// We should not put rep into resp.CrashTitle/CrashReport,
// because that will be treated as patch not fixing the bug.
if rep := testError.Report; rep != nil {
testErr = fmt.Errorf("%v\n\n%s\n\n%s", rep.Title, rep.Report, rep.Output)
} else {
testErr = fmt.Errorf("%v\n\n%s", testError.Title, testError.Output)
}
case errors.As(res.Error, &crashError):
if resReport == nil || (len(resReport.report.Report) == 0 && len(crashError.Report.Report) != 0) {
resReport = &patchTestResult{report: crashError.Report, rawOutput: res.RawOutput}
}
}
}
if resReport != nil {
return resReport, nil
}
if resSuccess != nil {
return resSuccess, nil
}
if testErr != nil {
return nil, testErr
}
return nil, anyErr
}
func (jp *JobProcessor) Logf(level int, msg string, args ...interface{}) {
log.Logf(level, "%s: "+msg, append([]interface{}{jp.name}, args...)...)
}
// Errorf logs non-fatal error and sends it to dashboard.
func (jp *JobProcessor) Errorf(msg string, args ...interface{}) {
log.Errorf("job: "+msg, args...)
if jp.dash != nil {
jp.dash.LogError(jp.name, msg, args...)
}
}