From 714ba3501f02bc01eef8535b9328f293f7d5c4ec Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 11:28:49 -0500 Subject: [PATCH 01/12] fix/result-chars --- internal/jobs/twitter.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 3d050d0e..618fa114 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -696,6 +696,7 @@ func (ws *TwitterScraper) ExecuteJob(j types.Job) (types.JobResult, error) { return types.JobResult{Error: err.Error()}, err } dat, err := json.Marshal(tweets) + fmt.Println(dat) // check if this has `/n` characters return types.JobResult{ Data: dat, }, err From c7c12812dbc2ec53d5565c8224e1d4debb62e1b7 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 16:53:53 -0500 Subject: [PATCH 02/12] get extended search data from query --- internal/jobs/twitter.go | 36 +++++++++++++++ internal/jobs/twitterx/scraper.go | 76 ++++++++++++++++++++++++++++--- 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 618fa114..68663ec1 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -159,9 +159,45 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun } for _, tweet := range result.Data { + var newTweet twitterscraper.Tweet + newTweet.ID = tweet.ID newTweet.Text = tweet.Text + newTweet.Username = tweet.Username + newTweet.UserID = tweet.AuthorID + + for _, place := range result.Includes.Places { + newTweet.Place = &twitterscraper.Place{ + ID: place.ID, + PlaceType: place.PlaceType, + Name: place.Name, + FullName: place.FullName, + CountryCode: place.CountryCode, + Country: place.Country, + BoundingBox: struct { + Type string `json:"type"` + Coordinates [][][]float64 `json:"coordinates"` + }{ + Type: place.Geo.Type, + }, + } + } + + for _, media := range result.Includes.Media { + if media.Type == "photo" { + photo := twitterscraper.Photo{ + ID: media.MediaKey, + } + newTweet.Photos = append(newTweet.Photos, photo) + } else if media.Type == "video" { + video := twitterscraper.Video{ + ID: media.MediaKey, + } + newTweet.Videos = append(newTweet.Videos, video) + } + } + tweets = append(tweets, &TweetResult{Tweet: &newTweet}) } diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index f2aa0309..a1f7d16a 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -10,6 +10,7 @@ import ( "net/url" "strconv" "strings" + "time" ) const ( @@ -22,18 +23,81 @@ type TwitterXScraper struct { type TwitterXSearchQueryResult struct { Data []struct { - Text string `json:"text"` - EditHistoryTweetIds []string `json:"edit_history_tweet_ids"` - ID string `json:"id"` + AuthorID string `json:"author_id"` + CreatedAt string `json:"created_at"` + ID string `json:"id"` + Text string `json:"text"` + Username string `json:"username"` } `json:"data"` + Errors []struct { + Detail string `json:"detail"` + Status int `json:"status"` + Title string `json:"title"` + Type string `json:"type"` + } `json:"errors"` + Includes struct { + Media []struct { + Height int `json:"height"` + MediaKey string `json:"media_key"` + Type string `json:"type"` + Width int `json:"width"` + } `json:"media"` + Places []struct { + ContainedWithin []string `json:"contained_within"` + Country string `json:"country"` + CountryCode string `json:"country_code"` + FullName string `json:"full_name"` + Geo struct { + Bbox []float64 `json:"bbox"` + Geometry struct { + Coordinates []float64 `json:"coordinates"` + Type string `json:"type"` + } `json:"geometry"` + Properties struct { + } `json:"properties"` + Type string `json:"type"` + } `json:"geo"` + ID string `json:"id"` + Name string `json:"name"` + PlaceType string `json:"place_type"` + } `json:"places"` + Polls []struct { + DurationMinutes int `json:"duration_minutes"` + EndDatetime time.Time `json:"end_datetime"` + ID string `json:"id"` + Options []struct { + Label string `json:"label"` + Position int `json:"position"` + Votes int `json:"votes"` + } `json:"options"` + VotingStatus string `json:"voting_status"` + } `json:"polls"` + Topics []struct { + Description string `json:"description"` + ID string `json:"id"` + Name string `json:"name"` + } `json:"topics"` + Tweets []struct { + AuthorID string `json:"author_id"` + CreatedAt string `json:"created_at"` + ID string `json:"id"` + Text string `json:"text"` + Username string `json:"username"` + } `json:"tweets"` + Users []struct { + CreatedAt time.Time `json:"created_at"` + ID string `json:"id"` + Name string `json:"name"` + Protected bool `json:"protected"` + Username string `json:"username"` + } `json:"users"` + } `json:"includes"` Meta struct { NewestID string `json:"newest_id"` + NextToken string `json:"next_token"` OldestID string `json:"oldest_id"` ResultCount int `json:"result_count"` - NextToken string `json:"next_token"` } `json:"meta"` - Status string - Message string } // SearchParams holds all possible search parameters From b60eef639d2764298dde9a31f3eeeb741e150095 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 16:58:21 -0500 Subject: [PATCH 03/12] get extended search data from query --- internal/jobs/twitter.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 68663ec1..902aeb07 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -732,7 +732,6 @@ func (ws *TwitterScraper) ExecuteJob(j types.Job) (types.JobResult, error) { return types.JobResult{Error: err.Error()}, err } dat, err := json.Marshal(tweets) - fmt.Println(dat) // check if this has `/n` characters return types.JobResult{ Data: dat, }, err From 32492f192bbf9f2dd0eedbd5369d0a463505b873 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 19:15:45 -0500 Subject: [PATCH 04/12] introduce twitter x data from x api --- internal/jobs/twitter.go | 45 +-------- internal/jobs/twitterx/scraper.go | 148 ++++++++++++++++-------------- 2 files changed, 81 insertions(+), 112 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 902aeb07..960100cd 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -18,6 +18,7 @@ import ( type TweetResult struct { Tweet *twitterscraper.Tweet + TweetXData *twitterx.TwitterXData ThreadCursor *twitterscraper.ThreadCursor Error error } @@ -153,52 +154,14 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun client := client.NewTwitterXClient(apiKey.Key) twitterXScraper := twitterx.NewTwitterXScraper(client) - result, err := twitterXScraper.ScrapeTweetsByQuery(query) + result, err := twitterXScraper.ScrapeTweetsByQuery(query, count) if err != nil { return nil, err } for _, tweet := range result.Data { - - var newTweet twitterscraper.Tweet - - newTweet.ID = tweet.ID - newTweet.Text = tweet.Text - newTweet.Username = tweet.Username - newTweet.UserID = tweet.AuthorID - - for _, place := range result.Includes.Places { - newTweet.Place = &twitterscraper.Place{ - ID: place.ID, - PlaceType: place.PlaceType, - Name: place.Name, - FullName: place.FullName, - CountryCode: place.CountryCode, - Country: place.Country, - BoundingBox: struct { - Type string `json:"type"` - Coordinates [][][]float64 `json:"coordinates"` - }{ - Type: place.Geo.Type, - }, - } - } - - for _, media := range result.Includes.Media { - if media.Type == "photo" { - photo := twitterscraper.Photo{ - ID: media.MediaKey, - } - newTweet.Photos = append(newTweet.Photos, photo) - } else if media.Type == "video" { - video := twitterscraper.Video{ - ID: media.MediaKey, - } - newTweet.Videos = append(newTweet.Videos, video) - } - } - - tweets = append(tweets, &TweetResult{Tweet: &newTweet}) + // Append the tweet to the list of tweet result + tweets = append(tweets, &TweetResult{TweetXData: &tweet}) } ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index a1f7d16a..edb8156d 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -21,80 +21,67 @@ type TwitterXScraper struct { twitterXClient *client.TwitterXClient } -type TwitterXSearchQueryResult struct { - Data []struct { - AuthorID string `json:"author_id"` - CreatedAt string `json:"created_at"` - ID string `json:"id"` - Text string `json:"text"` - Username string `json:"username"` - } `json:"data"` - Errors []struct { - Detail string `json:"detail"` - Status int `json:"status"` - Title string `json:"title"` - Type string `json:"type"` - } `json:"errors"` - Includes struct { - Media []struct { - Height int `json:"height"` - MediaKey string `json:"media_key"` - Type string `json:"type"` - Width int `json:"width"` - } `json:"media"` - Places []struct { - ContainedWithin []string `json:"contained_within"` - Country string `json:"country"` - CountryCode string `json:"country_code"` - FullName string `json:"full_name"` - Geo struct { - Bbox []float64 `json:"bbox"` - Geometry struct { - Coordinates []float64 `json:"coordinates"` - Type string `json:"type"` - } `json:"geometry"` - Properties struct { - } `json:"properties"` - Type string `json:"type"` - } `json:"geo"` - ID string `json:"id"` - Name string `json:"name"` - PlaceType string `json:"place_type"` - } `json:"places"` - Polls []struct { - DurationMinutes int `json:"duration_minutes"` - EndDatetime time.Time `json:"end_datetime"` - ID string `json:"id"` - Options []struct { - Label string `json:"label"` - Position int `json:"position"` - Votes int `json:"votes"` - } `json:"options"` - VotingStatus string `json:"voting_status"` - } `json:"polls"` - Topics []struct { - Description string `json:"description"` +type TwitterXData struct { + AuthorID string `json:"author_id"` + Entities struct { + Urls []struct { + Start int `json:"start"` + End int `json:"end"` + URL string `json:"url"` + ExpandedURL string `json:"expanded_url"` + DisplayURL string `json:"display_url"` + } `json:"urls"` + Annotations []struct { + Start int `json:"start"` + End int `json:"end"` + Probability float64 `json:"probability"` + Type string `json:"type"` + NormalizedText string `json:"normalized_text"` + } `json:"annotations"` + } `json:"entities"` + ID string `json:"id"` + PossiblySensitive bool `json:"possibly_sensitive"` + ReplySettings string `json:"reply_settings"` + ConversationID string `json:"conversation_id"` + PublicMetrics struct { + RetweetCount int `json:"retweet_count"` + ReplyCount int `json:"reply_count"` + LikeCount int `json:"like_count"` + QuoteCount int `json:"quote_count"` + BookmarkCount int `json:"bookmark_count"` + ImpressionCount int `json:"impression_count"` + } `json:"public_metrics"` + EditControls struct { + EditsRemaining int `json:"edits_remaining"` + IsEditEligible bool `json:"is_edit_eligible"` + EditableUntil time.Time `json:"editable_until"` + } `json:"edit_controls"` + Text string `json:"text"` + ContextAnnotations []struct { + Domain struct { ID string `json:"id"` Name string `json:"name"` - } `json:"topics"` - Tweets []struct { - AuthorID string `json:"author_id"` - CreatedAt string `json:"created_at"` - ID string `json:"id"` - Text string `json:"text"` - Username string `json:"username"` - } `json:"tweets"` - Users []struct { - CreatedAt time.Time `json:"created_at"` - ID string `json:"id"` - Name string `json:"name"` - Protected bool `json:"protected"` - Username string `json:"username"` - } `json:"users"` - } `json:"includes"` + Description string `json:"description"` + } `json:"domain"` + Entity struct { + ID string `json:"id"` + Name string `json:"name"` + } `json:"entity"` + } `json:"context_annotations"` + CreatedAt time.Time `json:"created_at"` + DisplayTextRange []int `json:"display_text_range"` + Lang string `json:"lang"` + EditHistoryTweetIds []string `json:"edit_history_tweet_ids"` + InReplyToUserID string `json:"in_reply_to_user_id,omitempty"` + ReferencedTweets []struct { + Type string `json:"type"` + ID string `json:"id"` + } `json:"referenced_tweets,omitempty"` +} +type TwitterXSearchQueryResult struct { + Data []TwitterXData `json:"data"` Meta struct { NewestID string `json:"newest_id"` - NextToken string `json:"next_token"` OldestID string `json:"oldest_id"` ResultCount int `json:"result_count"` } `json:"meta"` @@ -117,7 +104,7 @@ func NewTwitterXScraper(client *client.TwitterXClient) *TwitterXScraper { } // ScrapeTweetsByQuery Alternative version using url.Values for more parameters -func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQueryResult, error) { +func (s *TwitterXScraper) ScrapeTweetsByQuery(query string, count int) (*TwitterXSearchQueryResult, error) { // initialize the client client := s.twitterXClient @@ -134,12 +121,31 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string) (*TwitterXSearchQuer // construct the final URL with encoded parameters endpoint := baseURL + "?" + params.Encode() + // max_results + //if count = 0, just return the first 10 results.query parameter value [2] is not between 10 and 100 + if count == 0 || count < 10 || count > 100 { + logrus.Error("Invalid count value. Must be between 10 and 100") + return nil, fmt.Errorf("invalid count value. Must be between 10 and 100") + } + + endpoint = endpoint + "&max_results=" + strconv.Itoa(count) + + // include all possible fields - but note that the twitter api does not return all fields. + // TODO: check the response and adjust the fields as needed + endpoint = endpoint + "&tweet.fields=tweet.fields=created_at,author_id,public_metrics,context_annotations,geo,lang,possibly_sensitive,source,withheld,attachments,entities,conversation_id,in_reply_to_user_id,referenced_tweets,reply_settings,media_metadata,note_tweet,display_text_range,edit_controls,edit_history_tweet_ids,article,card_uri,community_id" + endpoint = endpoint + "&user.fields=username,affiliation,connection_status,created_at,description,entities,id,is_identity_verified,location,most_recent_tweet_id,name,parody,pinned_tweet_id,profile_banner_url,profile_image_url,protected,public_metrics,receives_your_dm,subscription,subscription_type,url,verified,verified_followers_count,verified_type,withheld" + endpoint = endpoint + "&place.fields=contained_within,country,country_code,full_name,geo,id,name,place_type" + + // sample + //https://api.x.com/2/tweets/search/recent?query=Learn+how+to+use+the+user+Tweet+timeline&tweet.fields=created_at,author_id,public_metrics,context_annotations,geo,lang,possibly_sensitive,source,withheld,attachments,entities,conversation_id,in_reply_to_user_id,referenced_tweets,reply_settings,media_metadata,note_tweet,display_text_range,edit_controls,edit_history_tweet_ids,article,card_uri,community_id&user.fields=username,affiliation,connection_status,created_at,description,entities,id,is_identity_verified,location,most_recent_tweet_id,name,parody,pinned_tweet_id,profile_banner_url,profile_image_url,protected,public_metrics,receives_your_dm,subscription,subscription_type,url,verified,verified_followers_count,verified_type,withheld&place.fields=contained_within,country,country_code,full_name,geo,id,name,place_type + // run the search response, err := client.Get(endpoint) if err != nil { logrus.Error("failed to execute search query: %w", err) return nil, fmt.Errorf("failed to execute search query: %w", err) } + defer response.Body.Close() // read the response body From c69d7541705507d3c041751afaf17a26b2ee84d3 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 19:23:27 -0500 Subject: [PATCH 05/12] add the count log --- internal/jobs/twitterx/scraper.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index edb8156d..e406c1c1 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -123,7 +123,12 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string, count int) (*Twitter // max_results //if count = 0, just return the first 10 results.query parameter value [2] is not between 10 and 100 - if count == 0 || count < 10 || count > 100 { + fmt.Println("count", count) + if count == 0 { + count = 10 + } + + if count < 10 || count > 100 { logrus.Error("Invalid count value. Must be between 10 and 100") return nil, fmt.Errorf("invalid count value. Must be between 10 and 100") } From a8dc65a80895801b55fca0bc00df15b3e047f705 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 19:26:01 -0500 Subject: [PATCH 06/12] remove duplicate field --- internal/jobs/twitterx/scraper.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index e406c1c1..4640ced9 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -137,8 +137,8 @@ func (s *TwitterXScraper) ScrapeTweetsByQuery(query string, count int) (*Twitter // include all possible fields - but note that the twitter api does not return all fields. // TODO: check the response and adjust the fields as needed - endpoint = endpoint + "&tweet.fields=tweet.fields=created_at,author_id,public_metrics,context_annotations,geo,lang,possibly_sensitive,source,withheld,attachments,entities,conversation_id,in_reply_to_user_id,referenced_tweets,reply_settings,media_metadata,note_tweet,display_text_range,edit_controls,edit_history_tweet_ids,article,card_uri,community_id" - endpoint = endpoint + "&user.fields=username,affiliation,connection_status,created_at,description,entities,id,is_identity_verified,location,most_recent_tweet_id,name,parody,pinned_tweet_id,profile_banner_url,profile_image_url,protected,public_metrics,receives_your_dm,subscription,subscription_type,url,verified,verified_followers_count,verified_type,withheld" + endpoint = endpoint + "&tweet.fields=created_at,author_id,public_metrics,context_annotations,geo,lang,possibly_sensitive,source,withheld,attachments,entities,conversation_id,in_reply_to_user_id,referenced_tweets,reply_settings,media_metadata,note_tweet,display_text_range,edit_controls,edit_history_tweet_ids,article,card_uri,community_id" + endpoint = endpoint + "&user.fields=username,affiliation,connection_status,description,entities,id,is_identity_verified,location,most_recent_tweet_id,name,parody,pinned_tweet_id,profile_banner_url,profile_image_url,protected,public_metrics,receives_your_dm,subscription,subscription_type,url,verified,verified_followers_count,verified_type,withheld" endpoint = endpoint + "&place.fields=contained_within,country,country_code,full_name,geo,id,name,place_type" // sample From 5c78750bc368119b427ea39b06161f2cb383e378 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 19:47:37 -0500 Subject: [PATCH 07/12] add meta --- internal/jobs/twitter.go | 3 ++- internal/jobs/twitterx/scraper.go | 12 +++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 960100cd..e69cee0d 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -19,6 +19,7 @@ import ( type TweetResult struct { Tweet *twitterscraper.Tweet TweetXData *twitterx.TwitterXData + TweetXMeta *twitterx.TwitterXMeta ThreadCursor *twitterscraper.ThreadCursor Error error } @@ -161,7 +162,7 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun for _, tweet := range result.Data { // Append the tweet to the list of tweet result - tweets = append(tweets, &TweetResult{TweetXData: &tweet}) + tweets = append(tweets, &TweetResult{TweetXData: &tweet, TweetXMeta: &result.Meta}) } ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index 4640ced9..aee3fb07 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -78,13 +78,15 @@ type TwitterXData struct { ID string `json:"id"` } `json:"referenced_tweets,omitempty"` } + +type TwitterXMeta struct { + NewestID string `json:"newest_id"` + OldestID string `json:"oldest_id"` + ResultCount int `json:"result_count"` +} type TwitterXSearchQueryResult struct { Data []TwitterXData `json:"data"` - Meta struct { - NewestID string `json:"newest_id"` - OldestID string `json:"oldest_id"` - ResultCount int `json:"result_count"` - } `json:"meta"` + Meta TwitterXMeta `json:"meta"` } // SearchParams holds all possible search parameters From 99922998f63099e9ec8ed0a65307583b7e1fa980 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 22:27:26 -0500 Subject: [PATCH 08/12] add meta --- internal/jobs/twitter.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index e69cee0d..5a608892 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -162,6 +162,7 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun for _, tweet := range result.Data { // Append the tweet to the list of tweet result + fmt.Println("Tweet: ", tweet) tweets = append(tweets, &TweetResult{TweetXData: &tweet, TweetXMeta: &result.Meta}) } From 3248140d33e30f730e66499c7edbd1277a4821ad Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 22:32:48 -0500 Subject: [PATCH 09/12] add meta --- internal/jobs/twitter.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 5a608892..36996bd1 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -162,11 +162,15 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun for _, tweet := range result.Data { // Append the tweet to the list of tweet result - fmt.Println("Tweet: ", tweet) tweets = append(tweets, &TweetResult{TweetXData: &tweet, TweetXMeta: &result.Meta}) } - ts.statsCollector.Add(stats.TwitterTweets, uint(len(result.Data))) + for _, tweet := range tweets { + fmt.Println(tweet.TweetXData.ID) + fmt.Println(tweet.TweetXData.Text) + } + + ts.statsCollector.Add(stats.TwitterTweets, uint(len(tweets))) return tweets, nil From 907b0157523b73a6ebd44c36f47b20fbaa6cdd1a Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 22:35:31 -0500 Subject: [PATCH 10/12] add meta --- internal/jobs/twitter.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 36996bd1..9622bce2 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -162,7 +162,11 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun for _, tweet := range result.Data { // Append the tweet to the list of tweet result - tweets = append(tweets, &TweetResult{TweetXData: &tweet, TweetXMeta: &result.Meta}) + tweetResult := &TweetResult{ + TweetXData: &tweet, + TweetXMeta: &result.Meta, + } + tweets = append(tweets, tweetResult) } for _, tweet := range tweets { From e7e82cbd7a2490282edc19fc99f323cc9c1bdb00 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 22:37:26 -0500 Subject: [PATCH 11/12] add meta --- internal/jobs/twitter.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 9622bce2..5b9a764d 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -18,8 +18,8 @@ import ( type TweetResult struct { Tweet *twitterscraper.Tweet - TweetXData *twitterx.TwitterXData - TweetXMeta *twitterx.TwitterXMeta + TweetXData twitterx.TwitterXData + TweetXMeta twitterx.TwitterXMeta ThreadCursor *twitterscraper.ThreadCursor Error error } @@ -163,8 +163,8 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun for _, tweet := range result.Data { // Append the tweet to the list of tweet result tweetResult := &TweetResult{ - TweetXData: &tweet, - TweetXMeta: &result.Meta, + TweetXData: tweet, + TweetXMeta: result.Meta, } tweets = append(tweets, tweetResult) } From 80fbd79035c8fc19a5c150c2224a6bb414b7ae98 Mon Sep 17 00:00:00 2001 From: Alvin Reyes Date: Tue, 18 Feb 2025 22:53:52 -0500 Subject: [PATCH 12/12] clean up --- internal/jobs/twitter.go | 11 ++++------- internal/jobs/twitterx/scraper.go | 10 ++++++++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/internal/jobs/twitter.go b/internal/jobs/twitter.go index 5b9a764d..b4d97f7f 100644 --- a/internal/jobs/twitter.go +++ b/internal/jobs/twitter.go @@ -18,10 +18,11 @@ import ( type TweetResult struct { Tweet *twitterscraper.Tweet - TweetXData twitterx.TwitterXData - TweetXMeta twitterx.TwitterXMeta ThreadCursor *twitterscraper.ThreadCursor Error error + + TweetXData twitterx.TwitterXData + TweetXMeta twitterx.TwitterXMeta } func parseAccounts(accountPairs []string) []*twitter.TwitterAccount { @@ -165,15 +166,11 @@ func (ts *TwitterScraper) ScrapeTweetsByQuery(baseDir string, query string, coun tweetResult := &TweetResult{ TweetXData: tweet, TweetXMeta: result.Meta, + Error: err, } tweets = append(tweets, tweetResult) } - for _, tweet := range tweets { - fmt.Println(tweet.TweetXData.ID) - fmt.Println(tweet.TweetXData.Text) - } - ts.statsCollector.Add(stats.TwitterTweets, uint(len(tweets))) return tweets, nil diff --git a/internal/jobs/twitterx/scraper.go b/internal/jobs/twitterx/scraper.go index aee3fb07..edc8dde4 100644 --- a/internal/jobs/twitterx/scraper.go +++ b/internal/jobs/twitterx/scraper.go @@ -85,8 +85,14 @@ type TwitterXMeta struct { ResultCount int `json:"result_count"` } type TwitterXSearchQueryResult struct { - Data []TwitterXData `json:"data"` - Meta TwitterXMeta `json:"meta"` + Data []TwitterXData `json:"data"` + Meta TwitterXMeta `json:"meta"` + Errors []struct { + Detail string `json:"detail"` + Status int `json:"status"` + Title string `json:"title"` + Type string `json:"type"` + } } // SearchParams holds all possible search parameters