Skip to content

Commit

Permalink
resty client cache
Browse files Browse the repository at this point in the history
  • Loading branch information
digger committed Sep 8, 2020
1 parent 8fdb981 commit 03d0546
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 16 deletions.
17 changes: 1 addition & 16 deletions core/crawler/crawler_core.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,8 @@ package crawler

import (
"bytes"
"digger/common"
"digger/models"
"digger/plugins"
"errors"
"fmt"
"github.com/PuerkitoBio/goquery"
"github.com/antchfx/htmlquery"
Expand All @@ -20,8 +18,6 @@ import (
"github.com/json-iterator/go"
"golang.org/x/net/html"
"io"
"net/http"
"time"
)

var (
Expand Down Expand Up @@ -113,18 +109,7 @@ func Play(
}

func request(queue *models.Queue, project *models.Project) (*resty.Response, error) {
return resty.New().
SetTimeout(time.Second * time.Duration(project.GetIntSetting(common.SETTINGS_REQUEST_TIMEOUT, 60))).
SetRedirectPolicy(resty.RedirectPolicyFunc(func(req *http.Request, via []*http.Request) error {
// return nil for continue redirect otherwise return error to stop/prevent redirect
f := project.GetBoolSetting(common.SETTINGS_FOLLOW_REDIRECT)
if f {
return nil
}
return errors.New("follow redirect is disabled")
})).
SetRetryCount(project.GetIntSetting(common.SETTINGS_RETRY_COUNT, 0)).
SetRetryWaitTime(time.Second * time.Duration(project.GetIntSetting(common.SETTINGS_RETRY_WAIT, 3))).
return getClient(queue, project).
R().
SetHeaders(project.Headers).
Get(queue.Url)
Expand Down
67 changes: 67 additions & 0 deletions core/crawler/resty_client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package crawler

import (
"digger/common"
"digger/models"
"errors"
"github.com/go-resty/resty/v2"
"github.com/hetianyi/gox/logger"
"github.com/hetianyi/gox/timer"
"net/http"
"sync"
"time"
)

type restyClient struct {
client *resty.Client
lastUse time.Time
}

var (
restyClientCache = make(map[int]*restyClient)
restyClientLock = new(sync.Mutex)
)

func init() {
expireClientDetect()
}

func expireClientDetect() {
timer.Start(0, time.Second*10, 0, func(t *timer.Timer) {
restyClientLock.Lock()
defer restyClientLock.Unlock()

for taskId, c := range restyClientCache {
if c.lastUse.Before(time.Now()) {
delete(restyClientCache, taskId)
logger.Debug("resty client expired: ", taskId)
break
}
}
})
}

func getClient(queue *models.Queue, project *models.Project) *resty.Client {
restyClientLock.Lock()
defer restyClientLock.Unlock()

client := restyClientCache[queue.TaskId]
if client == nil {
restyClientCache[queue.TaskId] = &restyClient{
client: resty.New().
SetTimeout(time.Second * time.Duration(project.GetIntSetting(common.SETTINGS_REQUEST_TIMEOUT, 60))).
SetRedirectPolicy(resty.RedirectPolicyFunc(func(req *http.Request, via []*http.Request) error {
// return nil for continue redirect otherwise return error to stop/prevent redirect
f := project.GetBoolSetting(common.SETTINGS_FOLLOW_REDIRECT)
if f {
return nil
}
return errors.New("follow redirect is disabled")
})).
SetRetryCount(project.GetIntSetting(common.SETTINGS_RETRY_COUNT, 0)).
SetRetryWaitTime(time.Second * time.Duration(project.GetIntSetting(common.SETTINGS_RETRY_WAIT, 3))),
lastUse: time.Now(),
}
}
return restyClientCache[queue.TaskId].client
}

0 comments on commit 03d0546

Please sign in to comment.