Skip to content

Commit

Permalink
Update logger
Browse files Browse the repository at this point in the history
  • Loading branch information
jonesrussell committed Mar 23, 2024
1 parent eeeb63a commit cd5240a
Show file tree
Hide file tree
Showing 15 changed files with 211 additions and 275 deletions.
4 changes: 2 additions & 2 deletions cmd/clearlinks.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ func ClearlinksMain(cmd *cobra.Command, _ []string) error {
}

if Debug {
manager.LoggerField.Debug("Debugging enabled. Clearing Redis set...", map[string]interface{}{})
manager.LoggerField.Debug("Debugging enabled. Clearing Redis set...")
}

manager.Logger().Info("Redis set cleared successfully", map[string]interface{}{})
manager.Logger().Info("Redis set cleared successfully")

return nil
}
Expand Down
12 changes: 6 additions & 6 deletions cmd/matchlinks.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,15 +51,15 @@ func runMatchLinks(cmd *cobra.Command, args []string) error {
}

if Debug {
manager.Logger().Info("\nFlags:", map[string]interface{}{})
manager.Logger().Info("\nFlags:")
cmd.Flags().VisitAll(func(flag *pflag.Flag) {
manager.Logger().Info(" %-12s : %v\n", map[string]interface{}{"flag": flag.Name, "value": flag.Value.String()})
manager.Logger().Info(fmt.Sprintf(" %-12s : %s\n", flag.Name, flag.Value.String()))
})

manager.Logger().Info("\nRedis Environment Variables:", map[string]interface{}{})
manager.Logger().Info("REDIS_HOST", map[string]interface{}{"value": viper.GetString("REDIS_HOST")})
manager.Logger().Info(" %-12s : %s\n", map[string]interface{}{"REDIS_PORT": viper.GetString("REDIS_PORT")})
manager.Logger().Info(" %-12s : %s\n", map[string]interface{}{"REDIS_AUTH": viper.GetString("REDIS_AUTH")})
manager.Logger().Info("\nRedis Environment Variables:")
manager.Logger().Info(fmt.Sprintf(" %-12s : %s\n", "REDIS_HOST", viper.GetString("REDIS_HOST")))
manager.Logger().Info(fmt.Sprintf(" %-12s : %s\n", "REDIS_PORT", viper.GetString("REDIS_PORT")))
manager.Logger().Info(fmt.Sprintf(" %-12s : %s\n", "REDIS_AUTH", viper.GetString("REDIS_AUTH")))
}

err := manager.StartCrawling(ctx, url, searchterms, Siteid, viper.GetInt("maxdepth"), viper.GetBool("debug"))
Expand Down
23 changes: 8 additions & 15 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/jonesrussell/page-prowler/internal/prowlredis"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"go.uber.org/zap/zapcore"
)

var (
Expand All @@ -36,16 +37,7 @@ var RootCmd = &cobra.Command{
// Initialize your dependencies here
ctx := context.Background()

var logLevel logger.LogLevel
if Debug {
logLevel = logger.DebugLevel // Set to debug level if Debug is true
log.Println("Debug mode is enabled")
} else {
logLevel = logger.InfoLevel // Otherwise, use the default log level
log.Println("Debug mode is not enabled")
}

appLogger, err := initializeLogger(logLevel)
appLogger, err := initializeLogger()
if err != nil {
log.Println("Error initializing logger:", err)
return err
Expand Down Expand Up @@ -140,12 +132,13 @@ func init() {
RootCmd.PersistentFlags().StringVarP(&Siteid, "siteid", "s", viper.GetString("siteid"), "Set siteid for redis set key")
}

func initializeLogger(level logger.LogLevel) (logger.Logger, error) {
initlog, err := logger.New(level)
if err != nil {
return nil, fmt.Errorf("failed to initialize logger: %v", err)
func initializeLogger() (logger.Logger, error) {
var level zapcore.Level
level = zapcore.InfoLevel
if Debug {
level = zapcore.DebugLevel
}
return initlog, nil
return logger.New(level) // Use the new logger constructor
}

func InitializeManager(
Expand Down
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/pelletier/go-toml/v2 v2.1.1 // indirect
github.com/pelletier/go-toml/v2 v2.2.0 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/sagikazarmark/locafero v0.4.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
Expand All @@ -78,7 +78,7 @@ require (
github.com/valyala/bytebufferpool v1.0.0 // indirect
go.mongodb.org/mongo-driver v1.14.0
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect
golang.org/x/exp v0.0.0-20240318143956-a85f2c67cd81 // indirect
golang.org/x/sys v0.18.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,8 @@ github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro=
github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg=
github.com/pelletier/go-toml/v2 v2.1.1 h1:LWAJwfNvjQZCFIDKWYQaM62NcYeYViCmWIwmOStowAI=
github.com/pelletier/go-toml/v2 v2.1.1/go.mod h1:tJU2Z3ZkXwnxa4DPO899bsyIoywizdUvyaeZurnPPDc=
github.com/pelletier/go-toml/v2 v2.2.0 h1:QLgLl2yMN7N+ruc31VynXs1vhMZa7CeHHejIeBAsoHo=
github.com/pelletier/go-toml/v2 v2.2.0/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
Expand Down Expand Up @@ -174,8 +174,8 @@ golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPh
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ=
golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc=
golang.org/x/exp v0.0.0-20240318143956-a85f2c67cd81 h1:6R2FC06FonbXQ8pK11/PDFY6N6LWlf9KlzibaCapmqc=
golang.org/x/exp v0.0.0-20240318143956-a85f2c67cd81/go.mod h1:CQ1k9gNrJ50XIzaKCRR2hssIjF07kZFEiieALBM/ARQ=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
Expand Down
41 changes: 9 additions & 32 deletions internal/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ type CrawlManagerInterface interface {
SetupCrawlingLogic(*CrawlOptions) error
CrawlURL(url string) error
HandleVisitError(url string, err error) error
LogError(message string, keysAndValues ...interface{})
Logger() logger.Logger
StartCrawling(ctx context.Context, url string, searchterms string, siteid string, maxdepth int, debug bool) error
ProcessMatchingLinkAndUpdateStats(*CrawlOptions, string, PageData, []string)
Expand Down Expand Up @@ -67,15 +66,15 @@ func (cm *CrawlManager) StartCrawling(ctx context.Context, url, searchTerms, cra

host, err := GetHostFromURL(url, cm.Logger())
if err != nil {
cm.LoggerField.Error("Failed to parse URL", map[string]interface{}{"url": url, "error": err})
cm.LoggerField.Error(fmt.Sprintf("Failed to parse URL: url: %v, error: %v", url, err))
return err
}

cm.LoggerField.Debug("Extracted host from URL", map[string]interface{}{"host": host})
cm.LoggerField.Debug(fmt.Sprintf("Extracted host from URL: %s", host))

err = cm.ConfigureCollector([]string{host}, maxDepth)
if err != nil {
cm.Logger().Fatal("Failed to configure collector", map[string]interface{}{"error": err})
cm.LoggerField.Fatal(fmt.Sprintf("Failed to configure collector: %v", err))
return err
}

Expand All @@ -101,7 +100,7 @@ func (cm *CrawlManager) StartCrawling(ctx context.Context, url, searchTerms, cra

// Crawl starts the crawling process for a given URL with the provided options.
func (cm *CrawlManager) Crawl(url string, options *CrawlOptions) ([]PageData, error) {
cm.LoggerField.Debug("CrawlURL", map[string]interface{}{"url": url})
cm.LoggerField.Debug(fmt.Sprintf("CrawlURL: %s", url))
err := cm.SetupCrawlingLogic(options)
if err != nil {
return nil, err
Expand Down Expand Up @@ -129,18 +128,10 @@ func (cm *CrawlManager) SetupErrorEventHandler(collector *colly.Collector) {

if statusCode == 500 {
// Handle 500 Internal Server Error without printing the stack trace
cm.LoggerField.Debug("[SetupErrorEventHandler] Internal Server Error",
map[string]interface{}{
"request_url": requestURL,
"status_code": fmt.Sprintf("%d", statusCode),
})
cm.LoggerField.Debug(fmt.Sprintf("[SetupErrorEventHandler] Internal Server Error request_url: %s, status_code: %d", requestURL, statusCode))
} else if statusCode != 404 {
// Handle other errors normally
cm.LoggerField.Debug("[SetupErrorEventHandler] Request URL failed",
map[string]interface{}{
"request_url": requestURL,
"status_code": fmt.Sprintf("%d", statusCode),
})
cm.LoggerField.Debug(fmt.Sprintf("[SetupErrorEventHandler] Request URL failed request_url: %s, status_code: %d", requestURL, statusCode))
}
})
}
Expand All @@ -159,34 +150,20 @@ func (cm *CrawlManager) SetupCrawlingLogic(options *CrawlOptions) error {

// CrawlURL visits the given URL and performs the crawling operation.
func (cm *CrawlManager) CrawlURL(url string) error {
cm.Logger().Debug("[CrawlURL] Visiting URL", map[string]interface{}{"url": url})
cm.LoggerField.Debug(fmt.Sprintf("[CrawlURL] Visiting URL: %v", map[string]interface{}{"url": url}))
err := cm.visitWithColly(url)
if err != nil {
return cm.HandleVisitError(url, err)
}
// cm.trackVisitedPage(url, options)
cm.Collector.Wait()
cm.Logger().Info("[CrawlURL] Crawling completed.", map[string]interface{}{})
cm.Logger().Info("[CrawlURL] Crawling completed.")
return nil
}

// HandleVisitError handles the error occurred during the visit of a URL.
func (cm *CrawlManager) HandleVisitError(url string, err error) error {
cm.LogError("Error visiting URL", "url", url, "error", err)
cm.LoggerField.Error(fmt.Sprintf("Error visiting URL: url: %s, error: %v", url, err))
return err
}

// LogError logs the error message along with the provided key-value pairs.
func (cm *CrawlManager) LogError(message string, keysAndValues ...interface{}) {
fields := make(map[string]interface{})
for i := 0; i < len(keysAndValues); i += 2 {
key, ok := keysAndValues[i].(string)
if !ok {
// Handle the case where the key is not a string
continue
}
value := keysAndValues[i+1]
fields[key] = value
}
cm.LoggerField.Error(message, fields)
}
48 changes: 25 additions & 23 deletions internal/crawler/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package crawler

import (
"errors"
"fmt"
"sync"

"github.com/gocolly/colly"
Expand Down Expand Up @@ -54,12 +55,12 @@ func (cm *CrawlManager) ConfigureCollector(allowedDomains []string, maxDepth int
colly.Debugger(cm.LoggerField),
)

cm.LoggerField.Debug("Allowed Domains", map[string]interface{}{"domains": allowedDomains})
cm.LoggerField.Debug(fmt.Sprintf("Allowed Domains: %v", allowedDomains))
cm.Collector.AllowedDomains = allowedDomains

limitRule := cm.createLimitRule()
if err := cm.Collector.Limit(limitRule); err != nil {
cm.Logger().Error("Failed to set limit rule: %v", map[string]interface{}{"err": err})
cm.LoggerField.Error(fmt.Sprintf("Failed to set limit rule: %v", map[string]interface{}{"err": err}))
return err
}

Expand All @@ -69,7 +70,7 @@ func (cm *CrawlManager) ConfigureCollector(allowedDomains []string, maxDepth int

// Register OnScraped callback
cm.Collector.OnScraped(func(r *colly.Response) {
cm.Logger().Debug("[OnScraped] Page scraped", map[string]interface{}{"url": r.Request.URL.String()})
cm.LoggerField.Debug(fmt.Sprintf("[OnScraped] Page scraped: %s", r.Request.URL.String()))
cm.StatsManager.LinkStatsMu.Lock()
defer cm.StatsManager.LinkStatsMu.Unlock()
cm.StatsManager.LinkStats.IncrementTotalPages()
Expand All @@ -80,30 +81,31 @@ func (cm *CrawlManager) ConfigureCollector(allowedDomains []string, maxDepth int

func (cm *CrawlManager) logCrawlingStatistics() {
report := cm.StatsManager.LinkStats.Report()
cm.Logger().Info("Crawling statistics", map[string]interface{}{
"TotalLinks": report["TotalLinks"],
"MatchedLinks": report["MatchedLinks"],
"NotMatchedLinks": report["NotMatchedLinks"],
"TotalPages": report["TotalPages"],
})
infoMessage := fmt.Sprintf("Crawling statistics: TotalLinks=%v, MatchedLinks=%v, NotMatchedLinks=%v, TotalPages=%v",
report["TotalLinks"], report["MatchedLinks"], report["NotMatchedLinks"], report["TotalPages"])
cm.LoggerField.Info(infoMessage)
}

func (cm *CrawlManager) visitWithColly(url string) error {
cm.LoggerField.Debug("[visitWithColly] Visiting URL with Colly", map[string]interface{}{"url": url})
cm.LoggerField.Debug(fmt.Sprintf("[visitWithColly] Visiting URL with Colly: %v", map[string]interface{}{"url": url}))

err := cm.Collector.Visit(url)
if err != nil {
switch {
case errors.Is(err, colly.ErrAlreadyVisited):
cm.LoggerField.Debug("[visitWithColly] URL already visited", map[string]interface{}{"url": url})
case errors.Is(err, colly.ErrForbiddenDomain):
cm.LoggerField.Debug("[visitWithColly] Forbidden domain - Skipping visit", map[string]interface{}{"url": url})
default:
cm.LoggerField.Debug("[visitWithColly] Error visiting URL", map[string]interface{}{"url": url, "error": err})
}
return nil
}

cm.LoggerField.Debug("[visitWithColly] Successfully visited URL with Colly", map[string]interface{}{"url": url})
if err != nil {
switch {
case errors.Is(err, colly.ErrAlreadyVisited):
errorMessage := fmt.Sprintf("[visitWithColly] URL already visited: %v", url)
cm.LoggerField.Debug(errorMessage)
case errors.Is(err, colly.ErrForbiddenDomain):
errorMessage := fmt.Sprintf("[visitWithColly] Forbidden domain - Skipping visit: %v", url)
cm.LoggerField.Debug(errorMessage)
default:
errorMessage := fmt.Sprintf("[visitWithColly] Error visiting URL: url=%v, error=%v", url, err)
cm.LoggerField.Error(errorMessage)
}
return nil
}

successMessage := fmt.Sprintf("[visitWithColly] Successfully visited URL: %v", url)
cm.LoggerField.Debug(successMessage)
return nil
}
26 changes: 13 additions & 13 deletions internal/crawler/results.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,54 +44,54 @@ func (p *PageData) UnmarshalBinary(data []byte) error {
}

// logResults prints the results of the crawl.
func logResults(crawlerService *CrawlManager, results []PageData) {
func logResults(cm *CrawlManager, results []PageData) {
if len(results) == 0 {
crawlerService.LoggerField.Info("No results to print", map[string]interface{}{})
cm.LoggerField.Info("No results to print")
return
}

jsonData, err := json.Marshal(results)
if err != nil {
crawlerService.LoggerField.Error("Error occurred during marshaling", map[string]interface{}{"error": err})
cm.LoggerField.Error(fmt.Sprintf("Error occurred during marshaling: %v", map[string]interface{}{"error": err}))
return
}

crawlerService.LoggerField.Info(string(jsonData), map[string]interface{}{})
cm.LoggerField.Info(string(jsonData))
}

func (cm *CrawlManager) SaveResultsToRedis(ctx context.Context, results []PageData, key string) error {
cm.LoggerField.Debug("SaveResultsToRedis: Number of results before processing", map[string]interface{}{"count": len(results)})
cm.LoggerField.Debug(fmt.Sprintf("SaveResultsToRedis: Number of results before processing: %d", len(results)))

for _, result := range results {
cm.LoggerField.Debug("SaveResultsToRedis: Processing result", map[string]interface{}{"result": result})
cm.LoggerField.Debug(fmt.Sprintf("SaveResultsToRedis: Processing result %v", map[string]interface{}{"result": result}))

data, err := json.Marshal(result)
if err != nil {
cm.LoggerField.Error("SaveResultsToRedis: Error occurred during marshalling to JSON", map[string]interface{}{"error": err})
cm.LoggerField.Error(fmt.Sprintf("SaveResultsToRedis: Error occurred during marshalling to JSON: %v", map[string]interface{}{"error": err}))
return err
}
str := string(data)
err = cm.Client.SAdd(ctx, key, str)
if err != nil {
cm.LoggerField.Error("SaveResultsToRedis: Error occurred during saving to Redis", map[string]interface{}{"error": err})
cm.LoggerField.Error(fmt.Sprintf("SaveResultsToRedis: Error occurred during saving to Redis: %v", map[string]interface{}{"error": err}))
return err
}
cm.LoggerField.Debug("SaveResultsToRedis: Added elements to the set", nil)
cm.LoggerField.Debug("SaveResultsToRedis: Added elements to the set")

// Debugging: Verify that the result was saved correctly
isMember, err := cm.Client.SIsMember(ctx, key, str)
if err != nil {
cm.LoggerField.Error("SaveResultsToRedis: Error occurred during checking membership in Redis set", map[string]interface{}{"error": err})
cm.LoggerField.Error(fmt.Sprintf("SaveResultsToRedis: Error occurred during checking membership in Redis set: %v", map[string]interface{}{"error": err}))
return err
}
if !isMember {
cm.LoggerField.Error("SaveResultsToRedis: Result was not saved correctly in Redis set", map[string]interface{}{"result": str})
cm.LoggerField.Error(fmt.Sprintf("SaveResultsToRedis: Result was not saved correctly in Redis set: %v", map[string]interface{}{"result": str}))
} else {
cm.LoggerField.Debug("SaveResultsToRedis: Result was saved correctly in Redis set", map[string]interface{}{"key": key, "result": str})
cm.LoggerField.Debug(fmt.Sprintf("SaveResultsToRedis: Result was saved correctly in Redis set, key: %s, result: %s", key, str))
}
}

cm.LoggerField.Debug("SaveResultsToRedis: Number of results after processing", map[string]interface{}{"count": len(results)})
cm.LoggerField.Debug(fmt.Sprintf("SaveResultsToRedis: Number of results after processing: %d", len(results)))

return nil
}
Expand Down
Loading

0 comments on commit cd5240a

Please sign in to comment.