From b891b662dd3c264df4930f12e5ac55a8bf81ddf7 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Sun, 16 Feb 2025 15:28:34 -0500 Subject: [PATCH 01/17] use TwitterResult rather than interface --- internal/jobs/twitter.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index e6505020..6ec07d5e 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -59,7 +59,7 @@ func (ts *TwitterScraper) getAuthenticatedScraper(baseDir string) (*twitter.Scra var scraper *twitter.Scraper if account != nil { - + authConfig := twitter.AuthConfig{ Account: account, BaseDir: baseDir, @@ -139,7 +139,7 @@ func (ts *TwitterScraper) ScrapeTweetsProfile(baseDir string, username string) ( return profile, nil } -func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, count int) (interface{}, error) { +func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, count int) ([]*TweetResult, error) { scraper, account, apiKey, err := ts.getAuthenticatedScraper(baseDir) if err != nil { return nil, err @@ -156,10 +156,20 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun if err != nil { return nil, err } + + var tweets []*TweetResult + for _, tweet := range result.Data { + var newTweet twitterscraper.Tweet + newTweet.ID = tweet.ID + newTweet.Text = tweet.Text + tweets = append(tweets, &TweetResult{Tweet: &newTweet}) + } + ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) - return result, nil - } + return tweets, nil + } + // Use the default scraper if no TwitterX API key is available var tweets []*TweetResult ctx := context.Background() scraper.SetSearchMode(twitterscraper.SearchLatest) From c7d0c094427e6af7f6ea74d0cc995398bcbceae4 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Sun, 16 Feb 2025 22:31:08 -0500 Subject: [PATCH 02/17] move tweets above logic --- internal/jobs/twitter.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 6ec07d5e..26e774f7 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -146,6 +146,7 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun } ts.statsCollector.Add(stats.TwitterScrapes, 1) + var tweets []*TweetResult // Check if we have a TwitterX API key if apiKey != nil { @@ -169,8 +170,8 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun return tweets, nil } + // Use the default scraper if no TwitterX API key is available - var tweets []*TweetResult ctx := context.Background() scraper.SetSearchMode(twitterscraper.SearchLatest) From 921222e7dc0813771648eef8fb075614d56b65a5 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 10:26:41 -0500 Subject: [PATCH 03/17] add logging on the twitter x client scraper --- internal/api/routes.go | 18 ++++++++++++++++++ internal/jobs/twitter.go | 3 ++- internal/jobs/twitterx/scraper.go | 6 +++++- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/internal/api/routes.go b/internal/api/routes.go index 5950b341..849c08cc 100644 --- a/internal/api/routes.go +++ b/internal/api/routes.go @@ -22,6 +22,14 @@ func generate(c echo.Context) error { return c.String(http.StatusOK, encryptedSignature) } +// add adds a job to the job server. +// +// The request body should contain a JobRequest, which will be decrypted and +// passed to the job server. The response body will contain a JobResponse with +// the UUID of the added job. +// +// If there is an error, the response body will contain a JobError with an +// appropriate error message. func add(jobServer *jobserver.JobServer) func(c echo.Context) error { return func(c echo.Context) error { jobRequest := types.JobRequest{} @@ -36,10 +44,20 @@ func add(jobServer *jobserver.JobServer) func(c echo.Context) error { uuid := jobServer.AddJob(*job) + // check if uuid is empty + if uuid == "" { + return c.JSON(http.StatusInternalServerError, types.JobError{Error: "Failed to add job"}) + } + return c.JSON(http.StatusOK, types.JobResponse{UID: uuid}) } } +// status returns the result of a job. If the job is not found, it returns an +// error with a status code of 404. If there is an error with the job, it +// returns an error with a status code of 500. If the job has not finished, it +// returns an empty string with a status code of 200. Otherwise, it returns the +// sealed result of the job with a status code of 200. func status(jobServer *jobserver.JobServer) func(c echo.Context) error { return func(c echo.Context) error { res, exists := jobServer.GetJobResult(c.Param("job_id")) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 26e774f7..47f33010 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -158,7 +158,6 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun return nil, err } - var tweets []*TweetResult for _, tweet := range result.Data { var newTweet twitterscraper.Tweet newTweet.ID = tweet.ID @@ -167,6 +166,7 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun } ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) + logrus.Info("Scraped tweets: ", len(tweets)) return tweets, nil } @@ -184,6 +184,7 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun } ts.statsCollector.Add(stats.TwitterTweets, uint(len(tweets))) + logrus.Info("Scraped tweets: ", len(tweets)) return tweets, nil } diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 8388a110..e630984e 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "github.com/masa-finance/tee-worker/pkg/client" + "github.com/sirupsen/logrus" "io" "net/http" "net/url" @@ -60,6 +61,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // run the search response, err := client.Get(endpoint) if err != nil { + logrus.Error("failed to execute search query: %w", err) return nil, fmt.Errorf("failed to execute search query: %w", err) } defer response.Body.Close() @@ -67,6 +69,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // check response status if response.StatusCode != http.StatusOK { body, _ := io.ReadAll(response.Body) + logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) } @@ -74,9 +77,10 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer var result TwitterXSearchQueryResult err = json.NewDecoder(response.Body).Decode(&result) if err != nil { + logrus.Error("failed to decode response: %w", err) return nil, fmt.Errorf("failed to decode response: %w", err) } - + logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) return &result, nil } From 73989b5932114e97eb98452dba450e7dcd5efb4d Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 10:27:41 -0500 Subject: [PATCH 04/17] add logs and search param move --- internal/jobs/twitterx/scraper.go | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index e630984e..2ca4be43 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -34,6 +34,16 @@ type TwitterXSearchQueryResult struct { } `json:"meta"` } +// SearchParams holds all possible search parameters +type SearchParams struct { + Query string // The search query + MaxResults int // Maximum number of results to return + NextToken string // Token for getting the next page of results + SinceID string // Returns results with a Tweet ID greater than this ID + UntilID string // Returns results with a Tweet ID less than this ID + TweetFields []string // Additional tweet fields to include +} + func NewTwitterXScraper(client *client.TwitterXClient) *TwitterXScraper { return &TwitterXScraper{ twitterXClient: client, @@ -121,6 +131,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi // run the search response, err := client.Get(endpoint) if err != nil { + logrus.Error("failed to execute search query: %w", err) return nil, fmt.Errorf("failed to execute search query: %w", err) } defer response.Body.Close() @@ -128,6 +139,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi // check response status if response.StatusCode != http.StatusOK { body, _ := io.ReadAll(response.Body) + logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) } @@ -135,18 +147,10 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi var result TwitterXSearchQueryResult err = json.NewDecoder(response.Body).Decode(&result) if err != nil { + logrus.Error("failed to decode response: %w", err) return nil, fmt.Errorf("failed to decode response: %w", err) } + logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) return &result, nil } - -// SearchParams holds all possible search parameters -type SearchParams struct { - Query string // The search query - MaxResults int // Maximum number of results to return - NextToken string // Token for getting the next page of results - SinceID string // Returns results with a Tweet ID greater than this ID - UntilID string // Returns results with a Tweet ID less than this ID - TweetFields []string // Additional tweet fields to include -} From eae17717f70ce5eb6bb9ffba630bb85c476b5118 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:20:39 -0500 Subject: [PATCH 05/17] log response body before decoding --- internal/jobs/twitterx/scraper.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 2ca4be43..cb358eaf 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -85,6 +85,8 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // unmarshal the response var result TwitterXSearchQueryResult + logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) + logrus.Info("Response body before decoding: ", response.Body) err = json.NewDecoder(response.Body).Decode(&result) if err != nil { logrus.Error("failed to decode response: %w", err) From e1fa01d3ef8e961b29995c0e26374935d1bde5df Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:24:09 -0500 Subject: [PATCH 06/17] log response body before decoding --- internal/jobs/twitterx/scraper.go | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index cb358eaf..d58f4125 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -77,8 +77,13 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer defer response.Body.Close() // check response status + var body []byte if response.StatusCode != http.StatusOK { - body, _ := io.ReadAll(response.Body) + body, err = io.ReadAll(response.Body) + if err != nil { + logrus.Error("failed to read response body: %w", err) + return nil, fmt.Errorf("failed to read response body: %w", err) + } logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) } @@ -86,8 +91,8 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // unmarshal the response var result TwitterXSearchQueryResult logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) - logrus.Info("Response body before decoding: ", response.Body) - err = json.NewDecoder(response.Body).Decode(&result) + logrus.Info("Response body before decoding: ", body) + err = json.Unmarshal(body, &result) if err != nil { logrus.Error("failed to decode response: %w", err) return nil, fmt.Errorf("failed to decode response: %w", err) From 0a8b75b8a59569153e69dc3e0a0e4cdde5106765 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:26:08 -0500 Subject: [PATCH 07/17] pass body to appropriate scrape --- pkg/client/twitter_x_client.go | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pkg/client/twitter_x_client.go b/pkg/client/twitter_x_client.go index d783d4aa..d799f047 100644 --- a/pkg/client/twitter_x_client.go +++ b/pkg/client/twitter_x_client.go @@ -76,22 +76,6 @@ func (c *TwitterXClient) Get(endpointUrl string) (*http.Response, error) { logrus.Errorf("error making GET request: %v", err) return nil, fmt.Errorf("error making GET request: %w", err) } - defer resp.Body.Close() - - // Read the response body - body, err := io.ReadAll(resp.Body) - if err != nil { - logrus.Errorf("error reading response: %v", err) - return nil, fmt.Errorf("error reading response: %w", err) - } - - logrus.Info("Response body: ", string(body)) - - // if the response is not 200, return an error - if resp.StatusCode != http.StatusOK { - logrus.Errorf("API request failed with status: %d", resp.StatusCode) - return nil, fmt.Errorf("API request failed with status: %d", resp.StatusCode) - } return resp, nil } From 1ba00889850ed22feac59b45a622da264d04d5c9 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:29:46 -0500 Subject: [PATCH 08/17] pass body to appropriate scrape --- internal/jobs/twitterx/scraper.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index d58f4125..42c0df71 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -77,17 +77,20 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer defer response.Body.Close() // check response status - var body []byte + fmt.Println("Response status code: ", response.StatusCode) if response.StatusCode != http.StatusOK { - body, err = io.ReadAll(response.Body) - if err != nil { - logrus.Error("failed to read response body: %w", err) - return nil, fmt.Errorf("failed to read response body: %w", err) - } logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) } + // read the response body + var body []byte + body, err = io.ReadAll(response.Body) + if err != nil { + logrus.Error("failed to read response body: %w", err) + return nil, fmt.Errorf("failed to read response body: %w", err) + } + // unmarshal the response var result TwitterXSearchQueryResult logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) From 160a579561fc1ec2f569955cd80d8aac4a0c04b2 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:30:35 -0500 Subject: [PATCH 09/17] pass body to appropriate scrape --- internal/jobs/twitterx/scraper.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 42c0df71..9f8663e6 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -79,8 +79,8 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // check response status fmt.Println("Response status code: ", response.StatusCode) if response.StatusCode != http.StatusOK { - logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) - return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) + logrus.Error("unexpected status code %d", response.StatusCode) + return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) } // read the response body @@ -90,7 +90,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer logrus.Error("failed to read response body: %w", err) return nil, fmt.Errorf("failed to read response body: %w", err) } - + // unmarshal the response var result TwitterXSearchQueryResult logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) From dbed96d221dca7f7a2ef5c46c3956381d0265a39 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:36:07 -0500 Subject: [PATCH 10/17] properly decode response --- internal/jobs/twitterx/scraper.go | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 9f8663e6..2732a737 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -91,16 +91,22 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer return nil, fmt.Errorf("failed to read response body: %w", err) } + logrus.WithField("response", string(body)).Debug("raw response body") + // unmarshal the response var result TwitterXSearchQueryResult logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) - logrus.Info("Response body before decoding: ", body) - err = json.Unmarshal(body, &result) - if err != nil { - logrus.Error("failed to decode response: %w", err) - return nil, fmt.Errorf("failed to decode response: %w", err) + if err := json.Unmarshal(body, &result); err != nil { + logrus.WithError(err).Error("failed to unmarshal response") + return nil, fmt.Errorf("failed to unmarshal response: %w", err) } - logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) + + logrus.WithFields(logrus.Fields{ + "result_count": result.Meta.ResultCount, + "newest_id": result.Meta.NewestID, + "oldest_id": result.Meta.OldestID, + }).Info("Successfully scraped tweets by query") + return &result, nil } From d373b41bd10e9dd0a9f008d4c864f3ceee6e6a6f Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:39:44 -0500 Subject: [PATCH 11/17] log the body --- internal/jobs/twitterx/scraper.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 2732a737..1f9cf843 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -90,8 +90,8 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer logrus.Error("failed to read response body: %w", err) return nil, fmt.Errorf("failed to read response body: %w", err) } - - logrus.WithField("response", string(body)).Debug("raw response body") + + logrus.WithField("response", string(body)).Info("raw response body") // unmarshal the response var result TwitterXSearchQueryResult @@ -106,7 +106,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer "newest_id": result.Meta.NewestID, "oldest_id": result.Meta.OldestID, }).Info("Successfully scraped tweets by query") - + return &result, nil } From 1e2584acfff934f1b1a8db0dd3be78913515caba Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:45:49 -0500 Subject: [PATCH 12/17] escape characters --- internal/jobs/twitterx/scraper.go | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 1f9cf843..6e9e2d9d 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -22,9 +22,9 @@ type TwitterXScraper struct { type TwitterXSearchQueryResult struct { Data []struct { - ID string `json:"id"` - EditHistoryTweetIds []string `json:"edit_history_tweet_ids"` Text string `json:"text"` + EditHistoryTweetIds []string `json:"edit_history_tweet_ids"` + ID string `json:"id"` } `json:"data"` Meta struct { NewestID string `json:"newest_id"` @@ -32,6 +32,8 @@ type TwitterXSearchQueryResult struct { ResultCount int `json:"result_count"` NextToken string `json:"next_token"` } `json:"meta"` + Status string + Message string } // SearchParams holds all possible search parameters @@ -76,13 +78,6 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer } defer response.Body.Close() - // check response status - fmt.Println("Response status code: ", response.StatusCode) - if response.StatusCode != http.StatusOK { - logrus.Error("unexpected status code %d", response.StatusCode) - return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) - } - // read the response body var body []byte body, err = io.ReadAll(response.Body) @@ -90,7 +85,14 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer logrus.Error("failed to read response body: %w", err) return nil, fmt.Errorf("failed to read response body: %w", err) } - + + // check response status + fmt.Println("Response status code: ", response.StatusCode) + if response.StatusCode != http.StatusOK { + logrus.Error("unexpected status code %d", response.StatusCode) + return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) + } + fmt.Println("Response body: ", string(body)) logrus.WithField("response", string(body)).Info("raw response body") // unmarshal the response From 88f8246d2bb5bf66f909157907c9a2cfb71cdffc Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:51:14 -0500 Subject: [PATCH 13/17] log tweets on scrap --- internal/jobs/twitter.go | 6 +++++- internal/jobs/twitterx/scraper.go | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 47f33010..113bbda2 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -162,11 +162,15 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun var newTweet twitterscraper.Tweet newTweet.ID = tweet.ID newTweet.Text = tweet.Text + + fmt.Println(newTweet) + fmt.Println(newTweet.Text) + tweets = append(tweets, &TweetResult{Tweet: &newTweet}) } ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) - logrus.Info("Scraped tweets: ", len(tweets)) + return tweets, nil } diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 6e9e2d9d..4dbcdbff 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -93,11 +93,9 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) } fmt.Println("Response body: ", string(body)) - logrus.WithField("response", string(body)).Info("raw response body") // unmarshal the response var result TwitterXSearchQueryResult - logrus.Info("Successfully scraped tweets by query, result count: ", result.Meta.ResultCount) if err := json.Unmarshal(body, &result); err != nil { logrus.WithError(err).Error("failed to unmarshal response") return nil, fmt.Errorf("failed to unmarshal response: %w", err) From e359709ffd65359482d92a452933c5c392491549 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:53:53 -0500 Subject: [PATCH 14/17] clean up --- internal/jobs/twitter.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 113bbda2..3d050d0e 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -162,10 +162,6 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun var newTweet twitterscraper.Tweet newTweet.ID = tweet.ID newTweet.Text = tweet.Text - - fmt.Println(newTweet) - fmt.Println(newTweet.Text) - tweets = append(tweets, &TweetResult{Tweet: &newTweet}) } From c0f8c76bad409f0fff57396ebdb13c867099e7cb Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 14:57:39 -0500 Subject: [PATCH 15/17] clean up the fmts --- internal/jobs/twitterx/scraper.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 4dbcdbff..f68b27ee 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -87,12 +87,10 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer } // check response status - fmt.Println("Response status code: ", response.StatusCode) if response.StatusCode != http.StatusOK { logrus.Error("unexpected status code %d", response.StatusCode) return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) } - fmt.Println("Response body: ", string(body)) // unmarshal the response var result TwitterXSearchQueryResult From 5792dad17760806d6ddefc4cda5fbd7ca246776f Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 17:54:17 -0500 Subject: [PATCH 16/17] lint/fmt error issues --- internal/jobs/twitterx/scraper.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index f68b27ee..c2722efb 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -88,7 +88,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // check response status if response.StatusCode != http.StatusOK { - logrus.Error("unexpected status code %d", response.StatusCode) + logrus.Errorf("unexpected status code %d", response.StatusCode) return nil, fmt.Errorf("unexpected status code %d", response.StatusCode) } @@ -145,7 +145,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi // run the search response, err := client.Get(endpoint) if err != nil { - logrus.Error("failed to execute search query: %w", err) + logrus.Errorf("failed to execute search query: %w", err) return nil, fmt.Errorf("failed to execute search query: %w", err) } defer response.Body.Close() @@ -153,7 +153,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi // check response status if response.StatusCode != http.StatusOK { body, _ := io.ReadAll(response.Body) - logrus.Error("unexpected status code %d: %s", response.StatusCode, string(body)) + logrus.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) return nil, fmt.Errorf("unexpected status code %d: %s", response.StatusCode, string(body)) } From 83e8be0d5849c1c2501fbc5f324e4327051a1ca5 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Mon, 17 Feb 2025 17:57:05 -0500 Subject: [PATCH 17/17] lint/fmt error issues does not support error-wrapping directive --- internal/jobs/twitterx/scraper.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index c2722efb..f2aa0309 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -145,7 +145,7 @@ func (s *TwitterXScraper) ScrapeTweetsByQueryExtended(params SearchParams) (*Twi // run the search response, err := client.Get(endpoint) if err != nil { - logrus.Errorf("failed to execute search query: %w", err) + logrus.Errorf("failed to execute search query: %s", err) return nil, fmt.Errorf("failed to execute search query: %w", err) } defer response.Body.Close()