-
Notifications
You must be signed in to change notification settings - Fork 28
/
sitedossier.go
86 lines (69 loc) · 2 KB
/
sitedossier.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
// Package sitedossier logic
package sitedossier
import (
"context"
"fmt"
"io"
"math/rand"
"net/http"
"regexp"
"time"
"github.com/ZhuriLab/Starmap/pkg/subscraping"
)
// SleepRandIntn is the integer value to get the pseudo-random number
// to sleep before find the next match
const SleepRandIntn = 5
var reNext = regexp.MustCompile(`<a href="([A-Za-z0-9/.]+)"><b>`)
type agent struct {
results chan subscraping.Result
session *subscraping.Session
}
func (a *agent) enumerate(ctx context.Context, baseURL string) {
select {
case <-ctx.Done():
return
default:
}
resp, err := a.session.SimpleGet(ctx, baseURL)
isnotfound := resp != nil && resp.StatusCode == http.StatusNotFound
if err != nil && !isnotfound {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
a.session.DiscardHTTPResponse(resp)
return
}
body, err := io.ReadAll(resp.Body)
if err != nil {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Error, Error: err}
resp.Body.Close()
return
}
resp.Body.Close()
src := string(body)
for _, match := range a.session.Extractor.FindAllString(src, -1) {
a.results <- subscraping.Result{Source: "sitedossier", Type: subscraping.Subdomain, Value: match}
}
match1 := reNext.FindStringSubmatch(src)
time.Sleep(time.Duration((3 + rand.Intn(SleepRandIntn))) * time.Second)
if len(match1) > 0 {
a.enumerate(ctx, "http://www.sitedossier.com"+match1[1])
}
}
// Source is the passive scraping agent
type Source struct{}
// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
a := agent{
session: session,
results: results,
}
go func() {
a.enumerate(ctx, fmt.Sprintf("http://www.sitedossier.com/parentdomain/%s", domain))
close(a.results)
}()
return a.results
}
// Name returns the name of the source
func (s *Source) Name() string {
return "sitedossier"
}