Skip to content

Commit

Permalink
feat: use gojob@6452a5cc22452391a67552bbc99515fb27c817d5
Browse files Browse the repository at this point in the history
  • Loading branch information
WangYihang committed Feb 27, 2024
1 parent 73fa8b5 commit 449f328
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 321 deletions.
6 changes: 2 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
go.work

# Data files
input.txt
output.txt
*.status
data/

# Goreleaser
dist/
dist/
7 changes: 5 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@ module github.com/WangYihang/http-grab

go 1.21.6

require github.com/jessevdk/go-flags v1.5.0
require (
github.com/WangYihang/gojob v0.0.8-0.20240227110649-6452a5cc2245
github.com/jessevdk/go-flags v1.5.0
)

require golang.org/x/sys v0.1.0 // indirect
require golang.org/x/sys v0.17.0 // indirect
6 changes: 4 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
github.com/WangYihang/gojob v0.0.8-0.20240227110649-6452a5cc2245 h1:PdxyljUyTYMm+Z8/ip8LjtTFYRekf/aSLeS8vKM49KE=
github.com/WangYihang/gojob v0.0.8-0.20240227110649-6452a5cc2245/go.mod h1:JWIJDgu3Zln9JfzQVXlEegL8kwU+laLkuSDg6fnp4tA=
github.com/jessevdk/go-flags v1.5.0 h1:1jKYvbxEjfUl0fmqTCOfonvskHHXMjBySTLW4y9LFvc=
github.com/jessevdk/go-flags v1.5.0/go.mod h1:Fw0T6WPc1dYxT4mKEZRfG5kJhaTDP9pj1c2EWnYs/m4=
golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
68 changes: 26 additions & 42 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,28 @@
package main

import (
"log/slog"
"os"

"github.com/WangYihang/gojob"
"github.com/WangYihang/gojob/pkg/util"
"github.com/WangYihang/http-grab/pkg/model"
"github.com/jessevdk/go-flags"
)

type Options struct {
InputFilePath string `short:"i" long:"input" description:"input file path" required:"true"`
OutputFilePath string `short:"o" long:"output" description:"output file path" required:"true"`
StatusUpdatesFilePath string `short:"s" long:"status-updates" description:"status updates file path"`

NumWorkers int `short:"n" long:"num-workers" description:"number of workers" default:"32"`
NumShards int64 `long:"num-shards" description:"number of shards" default:"1"`
Shard int64 `long:"shard" description:"shard" default:"0"`

Port int `short:"p" long:"port" description:"port" default:"80"`
Path string `long:"path" description:"path" default:"index.html"`
Host string `long:"host" description:"http host header" default:""`
MaxTries int `short:"m" long:"max-tries" description:"max tries" default:"4"`
Timeout int `short:"t" long:"timeout" description:"timeout" default:"8"`
InputFilePath string `short:"i" long:"input" description:"input file path" required:"true"`
OutputFilePath string `short:"o" long:"output" description:"output file path" required:"true"`
StatusFilePath string `short:"s" long:"status" description:"status file path" required:"true" default:"-"`

NumWorkers int `short:"n" long:"num-workers" description:"number of workers" default:"32"`
NumShards int64 `long:"num-shards" description:"number of shards" default:"1"`
Shard int64 `long:"shard" description:"shard" default:"0"`
MaxTries int `short:"m" long:"max-tries" description:"max tries" default:"4"`
MaxRuntimePerTaskSeconds int `short:"t" long:"max-runtime-per-task-seconds" description:"max runtime per task seconds" default:"60"`

Port int `short:"p" long:"port" description:"port" default:"80"`
Path string `long:"path" description:"path" default:"index.html"`
Host string `long:"host" description:"http host header, leave it blank to use the IP address" default:""`
}

var opts Options
Expand All @@ -31,36 +32,19 @@ func init() {
if err != nil {
os.Exit(1)
}
if opts.StatusUpdatesFilePath == "" {
opts.StatusUpdatesFilePath = opts.OutputFilePath + ".status"
}
if opts.Shard >= opts.NumShards {
slog.Error("shard must be less than num-shards", slog.Int64("shard", opts.Shard), slog.Int64("num_shards", opts.NumShards))
os.Exit(1)
}
}

func load() chan *model.Task {
return model.LoadTasks(
model.NewTask,
opts.InputFilePath,
opts.Port,
opts.Path,
opts.Host,
opts.Timeout,
opts.MaxTries,
opts.NumShards,
opts.Shard,
)
}

func main() {
numWorkers := opts.NumWorkers
numTasks := model.CountLines(opts.InputFilePath, opts.NumShards, opts.Shard)
slog.Info("all tasks loaded", slog.Int64("num_tasks", numTasks))
resultChans := make([]chan *model.Task, 0, numWorkers)
for _, taskChan := range model.FanOut(load(), numWorkers) {
resultChans = append(resultChans, model.Worker(taskChan))
scheduler := gojob.NewScheduler().
SetNumWorkers(opts.NumWorkers).
SetMaxRetries(opts.MaxTries).
SetMaxRuntimePerTaskSeconds(opts.MaxRuntimePerTaskSeconds).
SetNumShards(int64(opts.NumShards)).
SetShard(int64(opts.Shard)).
SetOutputFilePath(opts.OutputFilePath).
Start()
for line := range util.Cat(opts.InputFilePath) {
scheduler.Submit(model.NewTask(line, opts.Port, opts.Path, opts.Host))
}
model.StoreTasks(model.FanIn(resultChans), opts.OutputFilePath, opts.StatusUpdatesFilePath, numTasks)
scheduler.Wait()
}
192 changes: 0 additions & 192 deletions pkg/model/scheduler.go

This file was deleted.

0 comments on commit 449f328

Please sign in to comment.