Skip to content

Commit

Permalink
feat: support task sharding
Browse files Browse the repository at this point in the history
  • Loading branch information
WangYihang committed Feb 1, 2024
1 parent b8d2a6e commit 12db1d3
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 18 deletions.
21 changes: 13 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,21 @@ Usage:
http-grab [OPTIONS]

Application Options:
-i, --input= input file path
-o, --output= output file path
-n, --num-workers= number of workers (default: 32)
-t, --timeout= timeout (default: 8)
-p, --port= port (default: 80)
-P, --path= path (default: index.html)
-H, --host= host
-i, --input= input file path
-o, --output= output file path
-s, --status-updates= status updates file path
-n, --num-workers= number of workers (default: 32)
--seed= seed (default: 0)
--num-shards= number of shards (default: 1)
--shard= shard (default: 0)
-p, --port= port (default: 80)
--path= path (default: index.html)
--host= http host header
-m, --max-tries= max tries (default: 4)
-t, --timeout= timeout (default: 8)

Help Options:
-h, --help Show this help message
-h, --help Show this help message
```

```bash
Expand Down
20 changes: 14 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@ type Options struct {
InputFilePath string `short:"i" long:"input" description:"input file path" required:"true"`
OutputFilePath string `short:"o" long:"output" description:"output file path" required:"true"`
StatusUpdatesFilePath string `short:"s" long:"status-updates" description:"status updates file path"`
NumWorkers int `short:"n" long:"num-workers" description:"number of workers" default:"32"`
Timeout int `short:"t" long:"timeout" description:"timeout" default:"8"`
Port int `short:"p" long:"port" description:"port" default:"80"`
Path string `short:"P" long:"path" description:"path" default:"index.html"`
Host string `short:"H" long:"host" description:"host" default:""`
MaxTries int `short:"m" long:"max-tries" description:"max tries" default:"4"`

NumWorkers int `short:"n" long:"num-workers" description:"number of workers" default:"32"`
Seed int64 `long:"seed" description:"seed" default:"0"`
NumShards int64 `long:"num-shards" description:"number of shards" default:"1"`
Shard int64 `long:"shard" description:"shard" default:"0"`

Port int `short:"p" long:"port" description:"port" default:"80"`
Path string `long:"path" description:"path" default:"index.html"`
Host string `long:"host" description:"http host header" default:""`
MaxTries int `short:"m" long:"max-tries" description:"max tries" default:"4"`
Timeout int `short:"t" long:"timeout" description:"timeout" default:"8"`
}

var opts Options
Expand All @@ -41,6 +46,9 @@ func load() chan *model.Task {
opts.Host,
opts.Timeout,
opts.MaxTries,
opts.Seed,
opts.NumShards,
opts.Shard,
)
}

Expand Down
12 changes: 8 additions & 4 deletions pkg/model/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"fmt"
"log/slog"
"math/rand"
"os"
"sync"
"time"
Expand Down Expand Up @@ -68,15 +69,18 @@ type ITask interface {

type TaskFactory[T ITask] func(index int, ip string, port int, path string, host string, timeout int, numRetries int) T

func LoadTasks[T ITask](factory TaskFactory[T], inputFilePath string, port int, path string, host string, timeout int, numRetries int) chan T {
func LoadTasks[T ITask](factory TaskFactory[T], inputFilePath string, port int, path string, host string, timeout int, numRetries int, seed int64, numShards int64, shard int64) chan T {
out := make(chan T)
go func() {
defer close(out)
index := 0
rng := rand.NewSource(seed)
for line := range ReadFile(inputFilePath) {
task := factory(index, line, port, path, host, timeout, numRetries)
out <- task
index++
if rng.Int63()%numShards == shard {
task := factory(index, line, port, path, host, timeout, numRetries)
out <- task
index++
}
}
}()
return out
Expand Down

0 comments on commit 12db1d3

Please sign in to comment.