Skip to content

Commit

Permalink
*: A bulk commit (not recommended)
Browse files Browse the repository at this point in the history
feat: Handle bulk domains
refactor: Github source
  • Loading branch information
enenumxela committed Jul 26, 2023
1 parent df1f177 commit 3d93948
Show file tree
Hide file tree
Showing 17 changed files with 483 additions and 375 deletions.
228 changes: 164 additions & 64 deletions cmd/xurlfind3r/main.go
Expand Up @@ -8,50 +8,59 @@ import (
"reflect"
"strconv"
"strings"
"sync"

"github.com/hueristiq/hqgolog"
"github.com/hueristiq/hqgolog/formatter"
"github.com/hueristiq/hqgolog/levels"
"github.com/hueristiq/xurlfind3r/internal/configuration"
"github.com/hueristiq/xurlfind3r/pkg/xurlfind3r"
"github.com/hueristiq/xurlfind3r/pkg/xurlfind3r/sources"
"github.com/logrusorgru/aurora/v3"
"github.com/spf13/pflag"
)

var (
au aurora.Aurora

domain string
includeSubdomains bool
listSources bool
sourcesToUse []string
sourcesToExclude []string
parseWaybackRobots bool
parseWaybackSource bool
filterPattern string
matchPattern string
monochrome bool
output string
verbosity string
YAMLConfigFile string
domainsSlice []string
domainsListFilePath string
includeSubdomains bool
listSources bool
sourcesToUse []string
sourcesToExclude []string
parseWaybackRobots bool
parseWaybackSource bool
threads int
filterPattern string
matchPattern string
monochrome bool
output string
outputDirectory string
verbosity string
YAMLConfigFile string
)

func init() {
// defaults
defaultThreads := 50
defaultYAMLConfigFile := fmt.Sprintf("~/.hueristiq/%s/config.yaml", configuration.NAME)

// Handle CLI arguments, flags & help message (pflag)
pflag.StringVarP(&domain, "domain", "d", "", "")
pflag.StringSliceVarP(&domainsSlice, "domain", "d", []string{}, "")
pflag.StringVarP(&domainsListFilePath, "list", "l", "", "")
pflag.BoolVar(&includeSubdomains, "include-subdomains", false, "")
pflag.BoolVar(&listSources, "sources", false, "")
pflag.StringSliceVarP(&sourcesToUse, "use-sources", "u", []string{}, "")
pflag.StringSliceVarP(&sourcesToExclude, "exclude-sources", "e", []string{}, "")
pflag.BoolVar(&parseWaybackRobots, "parse-wayback-robots", false, "")
pflag.BoolVar(&parseWaybackSource, "parse-wayback-source", false, "")
pflag.IntVarP(&threads, "threads", "t", defaultThreads, "")
pflag.StringVarP(&filterPattern, "filter", "f", "", "")
pflag.StringVarP(&matchPattern, "match", "m", "", "")
pflag.BoolVar(&monochrome, "no-color", false, "")
pflag.StringVarP(&output, "output", "o", "", "")
pflag.StringVarP(&outputDirectory, "outputDirectory", "O", "", "")
pflag.StringVarP(&verbosity, "verbosity", "v", string(levels.LevelInfo), "")
pflag.StringVarP(&YAMLConfigFile, "configuration", "c", defaultYAMLConfigFile, "")

Expand All @@ -63,7 +72,8 @@ func init() {
h += " xurlfind3r [OPTIONS]\n"

h += "\nINPUT:\n"
h += " -d, --domain string domain to match URLs\n"
h += " -d, --domain string[] target domains\n"
h += " -l, --list string target domains' list file path\n"

h += "\nSCOPE:\n"
h += " --include-subdomains bool match subdomain's URLs\n"
Expand All @@ -75,13 +85,17 @@ func init() {
h += " --parse-wayback-robots bool with wayback, parse robots.txt snapshots\n"
h += " --parse-wayback-source bool with wayback, parse source code snapshots\n"

h += "\nOPTIMIZATION:\n"
h += fmt.Sprintf(" -t, --threads int number of threads (default: %d)\n", defaultThreads)

h += "\nFILTER & MATCH:\n"
h += " -f, --filter string regex to filter URLs\n"
h += " -m, --match string regex to match URLs\n"

h += "\nOUTPUT:\n"
h += " --no-color bool disable colored output\n"
h += " -o, --output string output URLs file path\n"
h += " -O, --output-directory string output URLs directory path\n"
h += fmt.Sprintf(" -v, --verbosity string debug, info, warning, error, fatal or silent (default: %s)\n", string(levels.LevelInfo))

h += "\nCONFIGURATION:\n"
Expand Down Expand Up @@ -153,77 +167,163 @@ func main() {
os.Exit(0)
}

// Find URLs
if verbosity != string(levels.LevelSilent) {
hqgolog.Info().Msgf("finding URLs for %v.", au.Underline(domain).Bold())
domains := make(chan string, threads)

if includeSubdomains {
hqgolog.Info().Msg("`--include-subdomains` used: match subdomain's URLs.")
// Load input domains
go func() {
defer close(domains)

// input domains: slice
for _, domain := range domainsSlice {
domains <- domain
}

hqgolog.Print().Msg("")
}
// input domains: file
if domainsListFilePath != "" {
file, err := os.Open(domainsListFilePath)
if err != nil {
hqgolog.Error().Msg(err.Error())
}

options := &xurlfind3r.Options{
Domain: domain,
IncludeSubdomains: includeSubdomains,
SourcesToUSe: sourcesToUse,
SourcesToExclude: sourcesToExclude,
Keys: config.Keys,
ParseWaybackRobots: parseWaybackRobots,
ParseWaybackSource: parseWaybackSource,
FilterPattern: filterPattern,
Matchattern: matchPattern,
}
scanner := bufio.NewScanner(file)

finder, err := xurlfind3r.New(options)
if err != nil {
hqgolog.Fatal().Msg(err.Error())
}
for scanner.Scan() {
domain := scanner.Text()

URLs := finder.Find()
if domain != "" {
domains <- domain
}
}

if output != "" {
// Create output file path directory
directory := filepath.Dir(output)
if err := scanner.Err(); err != nil {
hqgolog.Error().Msg(err.Error())
}
}

if _, err := os.Stat(directory); os.IsNotExist(err) {
if err = os.MkdirAll(directory, os.ModePerm); err != nil {
hqgolog.Fatal().Msg(err.Error())
// input domains: stdin
if hasStdin() {
scanner := bufio.NewScanner(os.Stdin)

for scanner.Scan() {
domain := scanner.Text()

if domain != "" {
domains <- domain
}
}

if err := scanner.Err(); err != nil {
hqgolog.Error().Msg(err.Error())
}
}
}()

// Find and output URLs.
var consolidatedWriter *bufio.Writer

// Create output file
file, err := os.OpenFile(output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if output != "" {
directory := filepath.Dir(output)

mkdir(directory)

consolidatedFile, err := os.OpenFile(output, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
hqgolog.Fatal().Msg(err.Error())
}

defer file.Close()
defer consolidatedFile.Close()

// Write URLs output file and print on screen
writer := bufio.NewWriter(file)
consolidatedWriter = bufio.NewWriter(consolidatedFile)
}

for URL := range URLs {
if verbosity == string(levels.LevelSilent) {
hqgolog.Print().Msg(URL.Value)
} else {
hqgolog.Print().Msgf("[%s] %s", au.BrightBlue(URL.Source), URL.Value)
if outputDirectory != "" {
mkdir(outputDirectory)
}

wg := &sync.WaitGroup{}

for i := 0; i < threads; i++ {
wg.Add(1)

go func() {
defer wg.Done()

options := &xurlfind3r.Options{
IncludeSubdomains: includeSubdomains,
SourcesToUSe: sourcesToUse,
SourcesToExclude: sourcesToExclude,
Keys: config.Keys,
ParseWaybackRobots: parseWaybackRobots,
ParseWaybackSource: parseWaybackSource,
FilterPattern: filterPattern,
Matchattern: matchPattern,
}

fmt.Fprintln(writer, URL.Value)
}
finder, err := xurlfind3r.New(options)
if err != nil {
hqgolog.Error().Msg(err.Error())

return
}

for domain := range domains {
URLs := finder.Find(domain)

switch {
case output != "":
processURLs(consolidatedWriter, URLs, verbosity)
case outputDirectory != "":
domainFile, err := os.OpenFile(filepath.Join(outputDirectory, domain+".txt"), os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
hqgolog.Fatal().Msg(err.Error())
}

if err = writer.Flush(); err != nil {
domainWriter := bufio.NewWriter(domainFile)

processURLs(domainWriter, URLs, verbosity)
default:
processURLs(nil, URLs, verbosity)
}
}
}()
}

wg.Wait()
}

func hasStdin() bool {
stat, err := os.Stdin.Stat()
if err != nil {
return false
}

isPipedFromChrDev := (stat.Mode() & os.ModeCharDevice) == 0
isPipedFromFIFO := (stat.Mode() & os.ModeNamedPipe) != 0

return isPipedFromChrDev || isPipedFromFIFO
}

func mkdir(path string) {
if _, err := os.Stat(path); os.IsNotExist(err) {
if err = os.MkdirAll(path, os.ModePerm); err != nil {
hqgolog.Fatal().Msg(err.Error())
}
} else {
// Print URLs on screen
for URL := range URLs {
if verbosity == string(levels.LevelSilent) {
hqgolog.Print().Msg(URL.Value)
} else {
hqgolog.Print().Msgf("[%s] %s", au.BrightBlue(URL.Source), URL.Value)
}
}

func processURLs(writer *bufio.Writer, URLs chan sources.URL, verbosity string) {
for URL := range URLs {
if verbosity == string(levels.LevelSilent) {
hqgolog.Print().Msg(URL.Value)
} else {
hqgolog.Print().Msgf("[%s] %s", au.BrightBlue(URL.Source), URL.Value)
}

if writer != nil {
fmt.Fprintln(writer, URL.Value)

if err := writer.Flush(); err != nil {
hqgolog.Fatal().Msg(err.Error())
}
}
}
Expand Down
11 changes: 6 additions & 5 deletions go.mod
Expand Up @@ -5,9 +5,10 @@ go 1.20
require (
dario.cat/mergo v1.0.0
github.com/hueristiq/hqgolimit v0.0.0-20230623113203-3e14552a97f8
github.com/hueristiq/hqgolog v0.0.0-20230623101640-92de7a10a4bb
github.com/hueristiq/hqgourl v0.0.0-20230623095947-4dee5ebb9a96
github.com/hueristiq/hqgolog v0.0.0-20230623113334-a6018965a34f
github.com/hueristiq/hqgourl v0.0.0-20230724201234-90b0b363ac90
github.com/logrusorgru/aurora/v3 v3.0.0
github.com/spf13/cast v1.5.1
github.com/spf13/pflag v1.0.5
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80
github.com/valyala/fasthttp v1.48.0
Expand All @@ -18,7 +19,7 @@ require (
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/klauspost/compress v1.16.3 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect
golang.org/x/net v0.11.0 // indirect
golang.org/x/sys v0.9.0 // indirect
golang.org/x/term v0.9.0 // indirect
golang.org/x/net v0.12.0 // indirect
golang.org/x/sys v0.10.0 // indirect
golang.org/x/term v0.10.0 // indirect
)
27 changes: 17 additions & 10 deletions go.sum
Expand Up @@ -2,16 +2,23 @@ dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk=
dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk=
github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs=
github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/hueristiq/hqgolimit v0.0.0-20230623113203-3e14552a97f8 h1:r4ze6pX8H//X4SJEIcn8wHPgAhaGKEaa44lyHh1epXY=
github.com/hueristiq/hqgolimit v0.0.0-20230623113203-3e14552a97f8/go.mod h1:CzhJzxz2rv/NMKNz5b4eKFh1epdcED05YTHT32NFyrI=
github.com/hueristiq/hqgolog v0.0.0-20230623101640-92de7a10a4bb h1:DQUVIiWnrTDQ4MP6UJw7/fMkySN+PYonDhlgBh31DDI=
github.com/hueristiq/hqgolog v0.0.0-20230623101640-92de7a10a4bb/go.mod h1:S5J3E3Azva5+JKv67uc+Hh3XwLDvkVYDGjEaMTFrIqg=
github.com/hueristiq/hqgourl v0.0.0-20230623095947-4dee5ebb9a96 h1:oQsID2S7L6dhNVbwkStxesXOMbn7LWfDSyohVbuKJe8=
github.com/hueristiq/hqgourl v0.0.0-20230623095947-4dee5ebb9a96/go.mod h1:8NAT2ECb69qzGf2d/ty0PVE3M3HK/+fXLtri2c47wQE=
github.com/hueristiq/hqgolog v0.0.0-20230623113334-a6018965a34f h1:JAgZOIJ+UbkENpRiOTlfg51CW0UNrUkgwLjUGiH+x9g=
github.com/hueristiq/hqgolog v0.0.0-20230623113334-a6018965a34f/go.mod h1:S5J3E3Azva5+JKv67uc+Hh3XwLDvkVYDGjEaMTFrIqg=
github.com/hueristiq/hqgourl v0.0.0-20230724201234-90b0b363ac90 h1:Du3nvvMK/KJLiCqY5batXILrljJs/Up8bVNT8QT/3PA=
github.com/hueristiq/hqgourl v0.0.0-20230724201234-90b0b363ac90/go.mod h1:V+4GiyE0z+oPokCZdV/4oDXPM+ofYQH/Mh0nZDhonfQ=
github.com/klauspost/compress v1.16.3 h1:XuJt9zzcnaz6a16/OU53ZjWp/v7/42WcR5t2a0PcNQY=
github.com/klauspost/compress v1.16.3/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/logrusorgru/aurora/v3 v3.0.0 h1:R6zcoZZbvVcGMvDCKo45A9U/lzYyzl5NfYIvznmDfE4=
github.com/logrusorgru/aurora/v3 v3.0.0/go.mod h1:vsR12bk5grlLvLXAYrBsb5Oc/N+LxAlxggSjiwMnCUc=
github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8=
github.com/spf13/cast v1.5.1 h1:R+kOtfhWQE6TVQzY+4D7wJLBgkdVasCEFxSUBYBYIlA=
github.com/spf13/cast v1.5.1/go.mod h1:b9PdjNptOpzXr7Rq1q9gJML/2cdGQAo69NKzQ10KN48=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/tomnomnom/linkheader v0.0.0-20180905144013-02ca5825eb80 h1:nrZ3ySNYwJbSpD6ce9duiP+QkD3JuLCcWkdaehUS/3Y=
Expand All @@ -20,12 +27,12 @@ github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6Kllzaw
github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
github.com/valyala/fasthttp v1.48.0 h1:oJWvHb9BIZToTQS3MuQ2R3bJZiNSa2KiNdeI8A+79Tc=
github.com/valyala/fasthttp v1.48.0/go.mod h1:k2zXd82h/7UZc3VOdJ2WaUqt1uZ/XpXAfE9i+HBC3lA=
golang.org/x/net v0.11.0 h1:Gi2tvZIJyBtO9SDr1q9h5hEQCp/4L2RQ+ar0qjx2oNU=
golang.org/x/net v0.11.0/go.mod h1:2L/ixqYpgIVXmeoSA/4Lu7BzTG4KIyPIryS4IsOd1oQ=
golang.org/x/sys v0.9.0 h1:KS/R3tvhPqvJvwcKfnBHJwwthS11LRhmM5D59eEXa0s=
golang.org/x/sys v0.9.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.9.0 h1:GRRCnKYhdQrD8kfRAdQ6Zcw1P0OcELxGLKJvtjVMZ28=
golang.org/x/term v0.9.0/go.mod h1:M6DEAAIenWoTxdKrOltXcmDY3rSplQUkrvaDU5FcQyo=
golang.org/x/net v0.12.0 h1:cfawfvKITfUsFCeJIHJrbSxpeu/E81khclypR0GVT50=
golang.org/x/net v0.12.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA=
golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c=
golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down

0 comments on commit 3d93948

Please sign in to comment.