Skip to content

Commit

Permalink
feat: add keywords, skip-error and retry options
Browse files Browse the repository at this point in the history
  • Loading branch information
jinyuliu authored and syhily committed Dec 28, 2023
1 parent 92aa5c9 commit 565e308
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 14 deletions.
11 changes: 11 additions & 0 deletions cmd/flags/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ var (
UserAgent = client.DefaultUserAgent
Proxy = ""
ConfigRoot = ""
Keywords = ""
Retry = 3
SkipError = true

// Common download flags.

Expand Down Expand Up @@ -68,17 +71,25 @@ func NewFetcher(category fetcher.Category, properties map[string]string) (fetche
return nil, err
}

keywords := strings.Split(Keywords, ",")
for i, keyword := range keywords {
keywords[i] = strings.TrimSpace(keyword)
}

return fetcher.New(&fetcher.Config{
Config: cc,
Category: category,
Formats: fs,
Keywords: keywords,
Extract: Extract,
DownloadPath: DownloadPath,
InitialBookID: InitialBookID,
Rename: Rename,
Thread: Thread,
RateLimit: RateLimit,
Properties: properties,
Retry: Retry,
SkipError: SkipError,
})
}

Expand Down
1 change: 1 addition & 0 deletions cmd/k12.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ var k12Cmd = &cobra.Command{
Row("Download Path", flags.DownloadPath).
Row("Thread", flags.Thread).
Row("Thread Limit (req/min)", flags.RateLimit).
Row("Keywords", flags.Keywords).
Print()

flags.Website = k12Website
Expand Down
3 changes: 3 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,8 @@ func init() {
persistentFlags.StringVarP(&flags.ConfigRoot, "config", "c", flags.ConfigRoot, "The config path for bookhunter")
persistentFlags.StringVar(&flags.Proxy, "proxy", flags.Proxy, "The request proxy")
persistentFlags.StringVarP(&flags.UserAgent, "user-agent", "a", flags.UserAgent, "The request user-agent")
persistentFlags.IntVarP(&flags.Retry, "retry", "r", flags.Retry, "The retry times for a failed download")
persistentFlags.BoolVarP(&flags.SkipError, "skip-error", "s", flags.SkipError, "Continue to download the next book if the current book download failed")
persistentFlags.StringVarP(&flags.Keywords, "keywords", "k", flags.Keywords, "The keywords for books")
persistentFlags.BoolVar(&log.EnableDebug, "verbose", false, "Print all the logs for debugging")
}
5 changes: 4 additions & 1 deletion internal/fetcher/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,16 @@ const (
type Config struct {
Category Category // The identity of the fetcher service.
Formats []file.Format // The formats that the user wants.
Keywords []string // The keywords that the user wants.
Extract bool // Extract the archives after download.
DownloadPath string // The path for storing the file.
InitialBookID int64 // The book id start to download.
Rename bool // Rename the file by using book ID.
Thread int // The number of download threads.
RateLimit int // Request per minute for a thread.
precessFile string // Define the download process.
Retry int // The retry times for a failed download.
SkipError bool // Continue to download the next book if the current book download failed.
processFile string // Define the download process.

// The extra configuration for a custom fetcher services.
Properties map[string]string
Expand Down
28 changes: 20 additions & 8 deletions internal/fetcher/fetcher.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package fetcher

import (
"errors"
"fmt"
"os"
"path/filepath"
"sync"
Expand Down Expand Up @@ -45,12 +47,12 @@ func (f *fetcher) Download() error {
}
log.Infof("Successfully query the download content counts: %d", size)

// Create download progress with ratelimit.
if f.precessFile == "" {
f.precessFile = defaultProgressFile
// Create download progress with rate limit.
if f.processFile == "" {
f.processFile = defaultProgressFile
}
rate := f.RateLimit * f.Thread
f.progress, err = progress.NewProgress(f.InitialBookID, size, rate, filepath.Join(configPath, f.precessFile))
f.progress, err = progress.NewProgress(f.InitialBookID, size, rate, filepath.Join(configPath, f.processFile))
if err != nil {
return err
}
Expand Down Expand Up @@ -120,9 +122,17 @@ thread:
// Download the file by formats one by one.
for format, share := range formats {
err := f.downloadFile(bookID, format, share)
if err != nil && err != ErrFileNotExist {
f.errs <- err
break thread
for retry := 0; err != nil && retry < f.Retry; retry++ {
fmt.Printf("Download book id %d failed: %v, retry (%d/%d)\n", bookID, err, retry, f.Retry)
err = f.downloadFile(bookID, format, share)
}

if err != nil && !errors.Is(err, ErrFileNotExist) {
fmt.Printf("Download book id %d failed: %v\n", bookID, err)
if !f.SkipError {
f.errs <- err
break thread
}
}
}

Expand All @@ -137,6 +147,8 @@ thread:

// downloadFile in a thread.
func (f *fetcher) downloadFile(bookID int64, format file.Format, share driver.Share) error {
f.progress.TakeRateLimit()
log.Debugf("Start download book id %d, format %s, share %v.", bookID, format, share)
// Create the file writer.
writer, err := f.creator.NewWriter(bookID, f.progress.Size(), share.FileName, share.SubPath, format, share.Size)
if err != nil {
Expand All @@ -153,7 +165,7 @@ func (f *fetcher) filterFormats(formats map[file.Format]driver.Share) map[file.F
fs := make(map[file.Format]driver.Share)
for format, share := range formats {
for _, vf := range f.Formats {
if format == vf {
if format == vf && matchKeywords(share.FileName, f.Keywords) {
fs[format] = share
break
}
Expand Down
15 changes: 15 additions & 0 deletions internal/fetcher/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package fetcher

import (
"fmt"
"strings"

"github.com/bookstairs/bookhunter/internal/driver"
"github.com/bookstairs/bookhunter/internal/file"
Expand All @@ -19,6 +20,20 @@ type service interface {
fetch(int64, file.Format, driver.Share, file.Writer) error
}

func matchKeywords(title string, keywords []string) bool {
if len(keywords) == 0 {
return true
}

for _, keyword := range keywords {
if strings.Contains(title, keyword) {
return true
}
}

return false
}

// newService is the endpoint for creating all the supported download service.
func newService(c *Config) (service, error) {
switch c.Category {
Expand Down
2 changes: 1 addition & 1 deletion internal/fetcher/telegram.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func newTelegramService(config *Config) (service, error) {
appHash := config.Property("appHash")

// Change the process file name.
config.precessFile = strings.ReplaceAll(channelID, "/", "_") + ".db"
config.processFile = strings.ReplaceAll(channelID, "/", "_") + ".db"

tel, err := telegram.New(channelID, mobile, appID, appHash, sessionPath, config.Proxy)
if err != nil {
Expand Down
13 changes: 9 additions & 4 deletions internal/progress/progress.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ var (
)

type Progress interface {
// TakeRateLimit would wait until the rate limit is available.
TakeRateLimit()

// AcquireBookID would find the book id from the assign array.
AcquireBookID() int64

Expand All @@ -42,7 +45,7 @@ type bitProgress struct {
file *os.File // The Progress file path for download progress.
}

// NewProgress Create a storge for save the download progress.
// NewProgress Create a storage for save the download progress.
func NewProgress(start, size int64, rate int, path string) (Progress, error) {
if start < 1 {
return nil, ErrStartBookID
Expand Down Expand Up @@ -127,14 +130,16 @@ func loadStorage(file *os.File) (*bitset.BitSet, error) {
return set, nil
}

// TakeRateLimit block until the rate meets the given config.
func (storage *bitProgress) TakeRateLimit() {
storage.limit.Take()
}

// AcquireBookID would find the book id from the assign array.
func (storage *bitProgress) AcquireBookID() int64 {
storage.lock.Lock()
defer storage.lock.Unlock()

// Block until the rate meets the given config.
storage.limit.Take()

for i := uint(0); i < storage.assigned.Len(); i++ {
if !storage.assigned.Test(i) {
storage.assigned.Set(i)
Expand Down

0 comments on commit 565e308

Please sign in to comment.